diff --git a/.mailmap b/.mailmap index 1469ff0d3f4d55dde07c676e4c4fc29f432d20f3..e18cab73e209a7b3057cfd672356dab6d9846483 100644 --- a/.mailmap +++ b/.mailmap @@ -107,6 +107,7 @@ Linus Lüssing Maciej W. Rozycki Marcin Nowakowski Mark Brown +Mark Yao Martin Kepplinger Martin Kepplinger Matthieu CASTET diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index d6d862db3b5d65fe09c26783298bba21ceec5558..bfd29bc8d37af1bbc4913deee9d941b1692bedda 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -375,3 +375,19 @@ Contact: Linux kernel mailing list Description: information about CPUs heterogeneity. cpu_capacity: capacity of cpu#. + +What: /sys/devices/system/cpu/vulnerabilities + /sys/devices/system/cpu/vulnerabilities/meltdown + /sys/devices/system/cpu/vulnerabilities/spectre_v1 + /sys/devices/system/cpu/vulnerabilities/spectre_v2 +Date: January 2018 +Contact: Linux kernel mailing list +Description: Information about CPU vulnerabilities + + The files are named after the code names of CPU + vulnerabilities. The output of those files reflects the + state of the CPUs in the system. Possible output values: + + "Not affected" CPU is not affected by the vulnerability + "Vulnerable" CPU is affected and no mitigation in effect + "Mitigation: $M" CPU is affected and mitigation $M is in effect diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b2598cc9834c3cbacfd9b3b1d885ed69c4c4c133..7242cbda15dd3bf8c4debea387ff2c8e376aeb60 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -109,6 +109,7 @@ parameter is applicable:: IPV6 IPv6 support is enabled. ISAPNP ISA PnP code is enabled. ISDN Appropriate ISDN support is enabled. + ISOL CPU Isolation is enabled. JOY Appropriate joystick support is enabled. KGDB Kernel debugger support is enabled. KVM Kernel Virtual Machine support is enabled. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6571fbfdb2a1527c25b3a01e9c4228c84adce639..46b26bfee27ba81267703a1ceac7c0e082f1e5ec 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -328,11 +328,15 @@ not play well with APC CPU idle - disable it if you have APC and your system crashes randomly. - apic= [APIC,X86-32] Advanced Programmable Interrupt Controller + apic= [APIC,X86] Advanced Programmable Interrupt Controller Change the output verbosity whilst booting Format: { quiet (default) | verbose | debug } Change the amount of debugging information output when initialising the APIC and IO-APIC components. + For X86-32, this can also be used to specify an APIC + driver name. + Format: apic=driver_name + Examples: apic=bigsmp apic_extnmi= [APIC,X86] External NMI delivery setting Format: { bsp (default) | all | none } @@ -709,9 +713,6 @@ It will be ignored when crashkernel=X,high is not used or memory reserved is below 4G. - crossrelease_fullstack - [KNL] Allow to record full stack trace in cross-release - cryptomgr.notests [KNL] Disable crypto self-tests @@ -1737,7 +1738,7 @@ isapnp= [ISAPNP] Format: ,,, - isolcpus= [KNL,SMP] Isolate a given set of CPUs from disturbance. + isolcpus= [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance. [Deprecated - use cpusets instead] Format: [flag-list,] @@ -2622,6 +2623,11 @@ nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2 + (indirect branch prediction) vulnerability. System may + allow data leaks with this option, which is equivalent + to spectre_v2=off. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -2662,7 +2668,7 @@ Valid arguments: on, off Default: on - nohz_full= [KNL,BOOT] + nohz_full= [KNL,BOOT,SMP,ISOL] The argument is a cpu list, as described above. In kernels built with CONFIG_NO_HZ_FULL=y, set the specified list of CPUs whose tick will be stopped @@ -3094,6 +3100,12 @@ pcie_scan_all Scan all possible PCIe devices. Otherwise we only look for one device below a PCIe downstream port. + big_root_window Try to add a big 64bit memory window to the PCIe + root complex on AMD CPUs. Some GFX hardware + can resize a BAR to allow access to all VRAM. + Adding the window is slightly risky (it may + conflict with unreported devices), so this + taints the kernel. pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. @@ -3282,6 +3294,21 @@ pt. [PARIDE] See Documentation/blockdev/paride.txt. + pti= [X86_64] Control Page Table Isolation of user and + kernel address spaces. Disabling this feature + removes hardening, but improves performance of + system calls and interrupts. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable to issues that PTI mitigates + + Not specifying this option is equivalent to pti=auto. + + nopti [X86_64] + Equivalent to pti=off + pty.legacy_count= [KNL] Number of legacy pty's. Overwrites compiled-in default number. @@ -3931,6 +3958,29 @@ sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt + spectre_v2= [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability. + + on - unconditionally enable + off - unconditionally disable + auto - kernel detects whether your CPU model is + vulnerable + + Selecting 'on' will, and 'auto' may, choose a + mitigation method at run time according to the + CPU, the available microcode, the setting of the + CONFIG_RETPOLINE configuration option, and the + compiler with which the kernel was built. + + Specific mitigations can also be selected manually: + + retpoline - replace indirect branches + retpoline,generic - google's original retpoline + retpoline,amd - AMD-specific minimal thunk + + Not specifying this option is equivalent to + spectre_v2=auto. + spia_io_base= [HW,MTD] spia_fio_base= spia_pedr= diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst index de50a8561774249351515662404e2a1f8328aba6..9b55952039a692d8119ce62502af9a4dd071b8e6 100644 --- a/Documentation/admin-guide/thunderbolt.rst +++ b/Documentation/admin-guide/thunderbolt.rst @@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with a sysfs attribute called "force_power". For example the intel-wmi-thunderbolt driver exposes this attribute in: - /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power + /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power To force the power to on, write 1 to this attribute file. To disable force power, write 0 to this attribute file. diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 304bf22bb83cc0ec8dfbbcf2a48b206ecb781afb..fc1c884fea10497357f889b11e33c6d323fecf55 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -75,3 +75,4 @@ stable kernels. | Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | | Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 | +| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 | diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 779211fbb69ffac450f22b0ad6864c7c6c2bd98f..2cddab7efb20df0dcf07c4d7d64a5611138a8d7c 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for normal scheduling policy and absolute bandwidth allocation model for realtime scheduling policy. +WARNING: cgroup2 doesn't yet support control of realtime processes and +the cpu controller can only be enabled when all RT processes are in +the root cgroup. Be aware that system management software may already +have placed RT processes into nonroot cgroups during the system boot +process, and these processes may need to be moved to the root cgroup +before the cpu controller can be enabled. + CPU Interface Files ~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/core-api/genericirq.rst b/Documentation/core-api/genericirq.rst index 0054bd48be849035146239cb6efc0d7dba0c0a62..4da67b65cecfa68e536efe7a6905cc0757df0417 100644 --- a/Documentation/core-api/genericirq.rst +++ b/Documentation/core-api/genericirq.rst @@ -225,9 +225,9 @@ interrupts. The following control flow is implemented (simplified excerpt):: - :c:func:`desc->irq_data.chip->irq_mask_ack`; + desc->irq_data.chip->irq_mask_ack(); handle_irq_event(desc->action); - :c:func:`desc->irq_data.chip->irq_unmask`; + desc->irq_data.chip->irq_unmask(); Default Fast EOI IRQ flow handler @@ -239,7 +239,7 @@ which only need an EOI at the end of the handler. The following control flow is implemented (simplified excerpt):: handle_irq_event(desc->action); - :c:func:`desc->irq_data.chip->irq_eoi`; + desc->irq_data.chip->irq_eoi(); Default Edge IRQ flow handler @@ -251,15 +251,15 @@ interrupts. The following control flow is implemented (simplified excerpt):: if (desc->status & running) { - :c:func:`desc->irq_data.chip->irq_mask_ack`; + desc->irq_data.chip->irq_mask_ack(); desc->status |= pending | masked; return; } - :c:func:`desc->irq_data.chip->irq_ack`; + desc->irq_data.chip->irq_ack(); desc->status |= running; do { if (desc->status & masked) - :c:func:`desc->irq_data.chip->irq_unmask`; + desc->irq_data.chip->irq_unmask(); desc->status &= ~pending; handle_irq_event(desc->action); } while (status & pending); @@ -293,10 +293,10 @@ simplified version without locking. The following control flow is implemented (simplified excerpt):: if (desc->irq_data.chip->irq_ack) - :c:func:`desc->irq_data.chip->irq_ack`; + desc->irq_data.chip->irq_ack(); handle_irq_event(desc->action); if (desc->irq_data.chip->irq_eoi) - :c:func:`desc->irq_data.chip->irq_eoi`; + desc->irq_data.chip->irq_eoi(); EOI Edge IRQ flow handler diff --git a/Documentation/devicetree/bindings/arm/ccn.txt b/Documentation/devicetree/bindings/arm/ccn.txt index 29801456c9ee9a3701481d8c57cc149df2c312d2..43b5a71a5a9dde70aeebc38510e5a478e1d8d742 100644 --- a/Documentation/devicetree/bindings/arm/ccn.txt +++ b/Documentation/devicetree/bindings/arm/ccn.txt @@ -15,7 +15,7 @@ Required properties: Example: - ccn@0x2000000000 { + ccn@2000000000 { compatible = "arm,ccn-504"; reg = <0x20 0x00000000 0 0x1000000>; interrupts = <0 181 4>; diff --git a/Documentation/devicetree/bindings/arm/omap/crossbar.txt b/Documentation/devicetree/bindings/arm/omap/crossbar.txt index bb5727ae004ac287c6725d53ef6a4b90a9e41480..ecb360ed0e332b70a84cb139eba8fd16ca847145 100644 --- a/Documentation/devicetree/bindings/arm/omap/crossbar.txt +++ b/Documentation/devicetree/bindings/arm/omap/crossbar.txt @@ -49,7 +49,7 @@ An interrupt consumer on an SoC using crossbar will use: interrupts = Example: - device_x@0x4a023000 { + device_x@4a023000 { /* Crossbar 8 used */ interrupts = ; ... diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt index 866d93421eba20ba81fa66e905ecd58213636d95..f9632bacbd04aebd4ad68c46012777eb0c263204 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt @@ -8,7 +8,7 @@ Required properties: - interrupts : Should contain MC General interrupt. Example: - memory-controller@0x7000f000 { + memory-controller@7000f000 { compatible = "nvidia,tegra20-mc"; reg = <0x7000f000 0x024 0x7000f03c 0x3c4>; diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt index fb40da303d25c8f13093aafe43df83eaf2f88f47..aca94fe9416f009ef387e9f7b7880bc5b6b5127e 100644 --- a/Documentation/devicetree/bindings/clock/axi-clkgen.txt +++ b/Documentation/devicetree/bindings/clock/axi-clkgen.txt @@ -17,7 +17,7 @@ Optional properties: - clock-output-names : From common clock binding. Example: - clock@0xff000000 { + clock@ff000000 { compatible = "adi,axi-clkgen"; #clock-cells = <0>; reg = <0xff000000 0x1000>; diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt index 7a837d2182acf74e0104b4da227cc2a4715d7504..4acfc8f641b63c83e6ad9c5e2efb510b43022213 100644 --- a/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt +++ b/Documentation/devicetree/bindings/clock/brcm,bcm2835-aux-clock.txt @@ -23,7 +23,7 @@ Example: clocks = <&clk_osc>; }; - aux: aux@0x7e215004 { + aux: aux@7e215004 { compatible = "brcm,bcm2835-aux"; #clock-cells = <1>; reg = <0x7e215000 0x8>; diff --git a/Documentation/devicetree/bindings/clock/exynos4-clock.txt b/Documentation/devicetree/bindings/clock/exynos4-clock.txt index bc61c952cb0b7221ccd47c099199d06598daf91e..17bb11365354d6d6126446874d562da2dfe7e45d 100644 --- a/Documentation/devicetree/bindings/clock/exynos4-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos4-clock.txt @@ -24,7 +24,7 @@ tree sources. Example 1: An example of a clock controller node is listed below. - clock: clock-controller@0x10030000 { + clock: clock-controller@10030000 { compatible = "samsung,exynos4210-clock"; reg = <0x10030000 0x20000>; #clock-cells = <1>; diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt index 536eacd1063f88ccf2e9ef0e0bdcf06137558cf8..aff266a12eeb71bf7cf05a8cbeecbe4390f21ef4 100644 --- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt @@ -22,7 +22,7 @@ tree sources. Example 1: An example of a clock controller node is listed below. - clock: clock-controller@0x10010000 { + clock: clock-controller@10010000 { compatible = "samsung,exynos5250-clock"; reg = <0x10010000 0x30000>; #clock-cells = <1>; diff --git a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt index 4527de3ea205d176f0425f9512e6bf560a2d3ce3..c68b0d29b3d031636f827ef904a60a6794270275 100644 --- a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt @@ -30,7 +30,7 @@ Example 1: An example of a clock controller node is listed below. #clock-cells = <0>; }; - clock: clock-controller@0x10010000 { + clock: clock-controller@10010000 { compatible = "samsung,exynos5410-clock"; reg = <0x10010000 0x30000>; #clock-cells = <1>; diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt index d54f42cf0440945396e039e3e37053bef04bbc9b..717a7b1531c78278e606c16f545627651dce2ccd 100644 --- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt @@ -23,7 +23,7 @@ tree sources. Example 1: An example of a clock controller node is listed below. - clock: clock-controller@0x10010000 { + clock: clock-controller@10010000 { compatible = "samsung,exynos5420-clock"; reg = <0x10010000 0x30000>; #clock-cells = <1>; diff --git a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt index 5f7005f73058f74a1fe308814eb11f75d8100435..c7d227c31e95bd944f351dd977682d3337b4a2e5 100644 --- a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt +++ b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt @@ -21,7 +21,7 @@ tree sources. Example: An example of a clock controller node is listed below. - clock: clock-controller@0x10010000 { + clock: clock-controller@10010000 { compatible = "samsung,exynos5440-clock"; reg = <0x160000 0x10000>; #clock-cells = <1>; diff --git a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt index 3e6a81e99804435c003560e9f2145a6a5e6e74c9..c35cb6c4af4d9fd7e681de3c9d6a5bdbe629e034 100644 --- a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt +++ b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt @@ -14,7 +14,7 @@ Required properties: Example: -pllctrl: pll-controller@0x02310000 { +pllctrl: pll-controller@02310000 { compatible = "ti,keystone-pllctrl", "syscon"; reg = <0x02310000 0x200>; }; diff --git a/Documentation/devicetree/bindings/clock/zx296702-clk.txt b/Documentation/devicetree/bindings/clock/zx296702-clk.txt index e85ecb510d56daad83ffbbe3572a96dc110a43a3..5c91c9e4f1beb47f35feb64361f3de23bcc35389 100644 --- a/Documentation/devicetree/bindings/clock/zx296702-clk.txt +++ b/Documentation/devicetree/bindings/clock/zx296702-clk.txt @@ -20,13 +20,13 @@ ID in its "clocks" phandle cell. See include/dt-bindings/clock/zx296702-clock.h for the full list of zx296702 clock IDs. -topclk: topcrm@0x09800000 { +topclk: topcrm@09800000 { compatible = "zte,zx296702-topcrm-clk"; reg = <0x09800000 0x1000>; #clock-cells = <1>; }; -uart0: serial@0x09405000 { +uart0: serial@09405000 { compatible = "zte,zx296702-uart"; reg = <0x09405000 0x1000>; interrupts = ; diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt index 7aef0eae58d43cce4a2d94aa6e20fc5c40343ffa..76aec8a3724d62120392be98cd093acd0f4015b5 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt @@ -456,7 +456,7 @@ System ON/OFF key driver Definition: this is phandle to the register map node. EXAMPLE: - snvs-pwrkey@0x020cc000 { + snvs-pwrkey@020cc000 { compatible = "fsl,sec-v4.0-pwrkey"; regmap = <&snvs>; interrupts = <0 4 0x4> @@ -545,7 +545,7 @@ FULL EXAMPLE interrupts = <93 2>; }; - snvs-pwrkey@0x020cc000 { + snvs-pwrkey@020cc000 { compatible = "fsl,sec-v4.0-pwrkey"; regmap = <&sec_mon>; interrupts = <0 4 0x4>; diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt index 001dd63979a974802fe69b6e72b12fe79c7c1c6b..148191b0fc15864725da6907a0571b64337024d6 100644 --- a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt +++ b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt @@ -9,7 +9,7 @@ Required properties: - clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon"; Example: - dfi: dfi@0xff630000 { + dfi: dfi@ff630000 { compatible = "rockchip,rk3399-dfi"; reg = <0x00 0xff630000 0x00 0x4000>; rockchip,pmu = <&pmugrf>; diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt index 7f040edc16fe6325e22e1c8f46081c758225c348..bf4a18047309a5f806bff4f8d5bf72459cf347c7 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt +++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.txt @@ -48,6 +48,10 @@ Required properties: Documentation/devicetree/bindings/reset/reset.txt, the reset-names should be "hdmitx_apb", "hdmitx", "hdmitx_phy" +Optional properties: +- hdmi-supply: Optional phandle to an external 5V regulator to power the HDMI + logic, as described in the file ../regulator/regulator.txt + Required nodes: The connections to the HDMI ports are modeled using the OF graph diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt index 00f74bad1e957c780fbb6f5b174aa356076e7ca0..057b81335775e7526805710324eb497fe7c04bcb 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt +++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.txt @@ -64,6 +64,10 @@ Required properties: - reg-names: should contain the names of the previous memory regions - interrupts: should contain the VENC Vsync interrupt number +Optional properties: +- power-domains: Optional phandle to associated power domain as described in + the file ../power/power_domain.txt + Required nodes: The connections to the VPU output video ports are modeled using the OF graph diff --git a/Documentation/devicetree/bindings/display/atmel,lcdc.txt b/Documentation/devicetree/bindings/display/atmel,lcdc.txt index 1a21202778ee354af7e5fc1f3d9ce65b0a7a964a..acb5a01321279e13d0c0c2780be8018b330152d8 100644 --- a/Documentation/devicetree/bindings/display/atmel,lcdc.txt +++ b/Documentation/devicetree/bindings/display/atmel,lcdc.txt @@ -27,7 +27,7 @@ Optional properties: Example: - fb0: fb@0x00500000 { + fb0: fb@00500000 { compatible = "atmel,at91sam9g45-lcdc"; reg = <0x00500000 0x1000>; interrupts = <23 3 0>; @@ -41,7 +41,7 @@ Example: Example for fixed framebuffer memory: - fb0: fb@0x00500000 { + fb0: fb@00500000 { compatible = "atmel,at91sam9263-lcdc"; reg = <0x00700000 0x1000 0x70000000 0x200000>; [...] diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9225.txt b/Documentation/devicetree/bindings/display/ilitek,ili9225.txt index 21607a541c332057bf7805fe90a52104b26c8473..a59feb52015be1329140529bc9befc5685abee8d 100644 --- a/Documentation/devicetree/bindings/display/ilitek,ili9225.txt +++ b/Documentation/devicetree/bindings/display/ilitek,ili9225.txt @@ -4,7 +4,7 @@ This binding is for display panels using an Ilitek ILI9225 controller in SPI mode. Required properties: -- compatible: "ilitek,ili9225-2.2in-176x220" +- compatible: "vot,v220hf01a-t", "ilitek,ili9225" - rs-gpios: Register select signal - reset-gpios: Reset pin @@ -16,7 +16,7 @@ Optional properties: Example: display@0{ - compatible = "ilitek,ili9225-2.2in-176x220"; + compatible = "vot,v220hf01a-t", "ilitek,ili9225"; reg = <0>; spi-max-frequency = <12000000>; rs-gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt new file mode 100644 index 0000000000000000000000000000000000000000..3d5ce6ad6ec71f48d8e67a36014101da708d3f7b --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.txt @@ -0,0 +1,49 @@ +Ilitek ILI9322 TFT panel driver with SPI control bus + +This is a driver for 320x240 TFT panels, accepting a variety of input +streams that get adapted and scaled to the panel. The panel output has +960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and +VCOMH outputs. + +Required properties: + - compatible: "dlink,dir-685-panel", "ilitek,ili9322" + (full system-specific compatible is always required to look up configuration) + - reg: address of the panel on the SPI bus + +Optional properties: + - vcc-supply: core voltage supply, see regulator/regulator.txt + - iovcc-supply: voltage supply for the interface input/output signals, + see regulator/regulator.txt + - vci-supply: voltage supply for analog parts, see regulator/regulator.txt + - reset-gpios: a GPIO spec for the reset pin, see gpio/gpio.txt + + The following optional properties only apply to RGB and YUV input modes and + can be omitted for BT.656 input modes: + + - pixelclk-active: see display/panel/display-timing.txt + - de-active: see display/panel/display-timing.txt + - hsync-active: see display/panel/display-timing.txt + - vsync-active: see display/panel/display-timing.txt + +The panel must obey the rules for a SPI slave device as specified in +spi/spi-bus.txt + +The device node can contain one 'port' child node with one child +'endpoint' node, according to the bindings defined in +media/video-interfaces.txt. This node should describe panel's video bus. + +Example: + +panel: display@0 { + compatible = "dlink,dir-685-panel", "ilitek,ili9322"; + reg = <0>; + vcc-supply = <&vdisp>; + iovcc-supply = <&vdisp>; + vci-supply = <&vdisp>; + + port { + panel_in: endpoint { + remote-endpoint = <&display_out>; + }; + }; +}; diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.txt b/Documentation/devicetree/bindings/display/panel/panel-common.txt index ec52c472c8459b920643af0592a30c385dd27a8e..557fa765adcb9450c4003555d1211978e73a9703 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-common.txt +++ b/Documentation/devicetree/bindings/display/panel/panel-common.txt @@ -78,6 +78,16 @@ used for panels that implement compatible control signals. while active. Active high reset signals can be supported by inverting the GPIO specifier polarity flag. +Power +----- + +- power-supply: display panels require power to be supplied. While several + panels need more than one power supply with panel-specific constraints + governing the order and timings of the power supplies, in many cases a single + power supply is sufficient, either because the panel has a single power rail, + or because all its power rails can be driven by the same supply. In that case + the power-supply property specifies the supply powering the panel as a phandle + to a regulator. Backlight --------- diff --git a/Documentation/devicetree/bindings/display/panel/panel-lvds.txt b/Documentation/devicetree/bindings/display/panel/panel-lvds.txt index b938269f841e5d6b55d8336a0d1be3ddb3127e45..250850a2150b8c9805997de6cfa153a8ea403aae 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-lvds.txt +++ b/Documentation/devicetree/bindings/display/panel/panel-lvds.txt @@ -32,6 +32,7 @@ Optional properties: - label: See panel-common.txt. - gpios: See panel-common.txt. - backlight: See panel-common.txt. +- power-supply: See panel-common.txt. - data-mirror: If set, reverse the bit order described in the data mappings below on all data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6. diff --git a/Documentation/devicetree/bindings/display/panel/simple-panel.txt b/Documentation/devicetree/bindings/display/panel/simple-panel.txt index 1341bbf4aa3d13147de6465d5219989338b9d3b7..16d8ff088b7d15504afd58984b291ded9a4b7f15 100644 --- a/Documentation/devicetree/bindings/display/panel/simple-panel.txt +++ b/Documentation/devicetree/bindings/display/panel/simple-panel.txt @@ -1,7 +1,7 @@ Simple display panel Required properties: -- power-supply: regulator to provide the supply voltage +- power-supply: See panel-common.txt Optional properties: - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing diff --git a/Documentation/devicetree/bindings/display/panel/toppoly,td028ttec1.txt b/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt similarity index 84% rename from Documentation/devicetree/bindings/display/panel/toppoly,td028ttec1.txt rename to Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt index 7175dc3740acf12ddddd673a13d543eecb9c6440..ed34253d9fb129916f25db38440faf13645721c3 100644 --- a/Documentation/devicetree/bindings/display/panel/toppoly,td028ttec1.txt +++ b/Documentation/devicetree/bindings/display/panel/tpo,td028ttec1.txt @@ -2,7 +2,7 @@ Toppoly TD028TTEC1 Panel ======================== Required properties: -- compatible: "toppoly,td028ttec1" +- compatible: "tpo,td028ttec1" Optional properties: - label: a symbolic name for the panel @@ -14,7 +14,7 @@ Example ------- lcd-panel: td028ttec1@0 { - compatible = "toppoly,td028ttec1"; + compatible = "tpo,td028ttec1"; reg = <0>; spi-max-frequency = <100000>; spi-cpol; diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.txt b/Documentation/devicetree/bindings/display/sitronix,st7735r.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0a5090a3326b86c1df8a42eb65e8884a77cd521 --- /dev/null +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.txt @@ -0,0 +1,35 @@ +Sitronix ST7735R display panels + +This binding is for display panels using a Sitronix ST7735R controller in SPI +mode. + +Required properties: +- compatible: "jianda,jd-t18003-t01", "sitronix,st7735r" +- dc-gpios: Display data/command selection (D/CX) +- reset-gpios: Reset signal (RSTX) + +The node for this driver must be a child node of a SPI controller, hence +all mandatory properties described in ../spi/spi-bus.txt must be specified. + +Optional properties: +- rotation: panel rotation in degrees counter clockwise (0,90,180,270) +- backlight: phandle of the backlight device attached to the panel + +Example: + + backlight: backlight { + compatible = "gpio-backlight"; + gpios = <&gpio 44 GPIO_ACTIVE_HIGH>; + } + + ... + + display@0{ + compatible = "jianda,jd-t18003-t01", "sitronix,st7735r"; + reg = <0>; + spi-max-frequency = <32000000>; + dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; + rotation = <270>; + backlight = &backlight; + }; diff --git a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt index 50cc72ee11689ccc99f54252b0c1a442362b9569..cd626ee1147a2c92da8592dbd6a48728b54181e0 100644 --- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt +++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt @@ -93,6 +93,7 @@ Required properties: * allwinner,sun6i-a31s-tcon * allwinner,sun7i-a20-tcon * allwinner,sun8i-a33-tcon + * allwinner,sun8i-a83t-tcon-lcd * allwinner,sun8i-v3s-tcon - reg: base address and size of memory-mapped region - interrupts: interrupt associated to this IP @@ -121,6 +122,14 @@ Required properties: On SoCs other than the A33 and V3s, there is one more clock required: - 'tcon-ch1': The clock driving the TCON channel 1 +On SoCs that support LVDS (all SoCs but the A13, H3, H5 and V3s), you +need one more reset line: + - 'lvds': The reset line driving the LVDS logic + +And on the A23, A31, A31s and A33, you need one more clock line: + - 'lvds-alt': An alternative clock source, separate from the TCON channel 0 + clock, that can be used to drive the LVDS clock + DRC --- @@ -216,6 +225,7 @@ supported. Required properties: - compatible: value must be one of: + * allwinner,sun8i-a83t-de2-mixer-0 * allwinner,sun8i-v3s-de2-mixer - reg: base address and size of the memory-mapped region. - clocks: phandles to the clocks feeding the mixer @@ -245,6 +255,7 @@ Required properties: * allwinner,sun6i-a31s-display-engine * allwinner,sun7i-a20-display-engine * allwinner,sun8i-a33-display-engine + * allwinner,sun8i-a83t-display-engine * allwinner,sun8i-v3s-display-engine - allwinner,pipelines: list of phandle to the display engine diff --git a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt index 844e0103fb0dac15220794cfde75f6d58b2c4d73..593be44a53c9527774c35ca9331f9b25e23d97cd 100644 --- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt @@ -206,21 +206,33 @@ of the following host1x client modules: - "nvidia,tegra132-sor": for Tegra132 - "nvidia,tegra210-sor": for Tegra210 - "nvidia,tegra210-sor1": for Tegra210 + - "nvidia,tegra186-sor": for Tegra186 + - "nvidia,tegra186-sor1": for Tegra186 - reg: Physical base address and length of the controller's registers. - interrupts: The interrupt outputs from the controller. - clocks: Must contain an entry for each entry in clock-names. See ../clocks/clock-bindings.txt for details. - clock-names: Must include the following entries: - sor: clock input for the SOR hardware - - source: source clock for the SOR clock + - out: SOR output clock - parent: input for the pixel clock - dp: reference clock for the SOR clock - safe: safe reference for the SOR clock during power up + + For Tegra186 and later: + - pad: SOR pad output clock (on Tegra186 and later) + + Obsolete: + - source: source clock for the SOR clock (obsolete, use "out" instead) + - resets: Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. - reset-names: Must include the following entries: - sor + Required properties on Tegra186 and later: + - nvidia,interface: index of the SOR interface + Optional properties: - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection diff --git a/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt index c30f9ec189ed612cf1ab79de9e7a4ea4c427f1b8..91279f1060fe1a4f58ae0b4908dee632e98e6b11 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt +++ b/Documentation/devicetree/bindings/display/ti/ti,dra7-dss.txt @@ -47,6 +47,11 @@ Required properties: - clocks: handle to fclk - clock-names: "fck" +Optional properties: +- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit + in bytes per second + + HDMI ---- diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt index afcd5a86c6a45a8a1d9bac21e566ad93a47f95c2..ee867c4d1152e4a62b41f266b6944b943203e34c 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt +++ b/Documentation/devicetree/bindings/display/ti/ti,omap2-dss.txt @@ -28,6 +28,10 @@ Required properties: - ti,hwmods: "dss_dispc" - interrupts: the DISPC interrupt +Optional properties: +- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit + in bytes per second + RFBI ---- diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt index dc66e1447c318139b333b4d5338b5090b0fffa5b..cd02516a40b6f0a2974a08d307ba66ec265dfc70 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt +++ b/Documentation/devicetree/bindings/display/ti/ti,omap3-dss.txt @@ -37,6 +37,10 @@ Required properties: - clocks: handle to fclk - clock-names: "fck" +Optional properties: +- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit + in bytes per second + RFBI ---- diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt index bc624db8888d761e814ff1df5fa4d36a6a23ae13..0f85f6b3a5a8b4609c7c1fe6a056d210dfc25b7c 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt +++ b/Documentation/devicetree/bindings/display/ti/ti,omap4-dss.txt @@ -36,6 +36,10 @@ Required properties: - clocks: handle to fclk - clock-names: "fck" +Optional properties: +- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit + in bytes per second + RFBI ---- diff --git a/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt b/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt index 118a486c47bb71740511a87e661446a7df299580..20861218649f14412c3b280e533e6426d7e93053 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt +++ b/Documentation/devicetree/bindings/display/ti/ti,omap5-dss.txt @@ -36,6 +36,10 @@ Required properties: - clocks: handle to fclk - clock-names: "fck" +Optional properties: +- max-memory-bandwidth: Input memory (from main memory to dispc) bandwidth limit + in bytes per second + RFBI ---- diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt index 55492c264d1779539ffb838f3e2f020c07e45709..b3408cc57be6d398f2c206f5d599d7134004dd0b 100644 --- a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt +++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt @@ -73,7 +73,7 @@ Hypervisor OS configuration: max-read-transactions = <31>; channel-reset-timeout-cycles = <0x500>; - hidma_24: dma-controller@0x5c050000 { + hidma_24: dma-controller@5c050000 { compatible = "qcom,hidma-1.0"; reg = <0 0x5c050000 0x0 0x1000>, <0 0x5c0b0000 0x0 0x1000>; @@ -85,7 +85,7 @@ Hypervisor OS configuration: Guest OS configuration: - hidma_24: dma-controller@0x5c050000 { + hidma_24: dma-controller@5c050000 { compatible = "qcom,hidma-1.0"; reg = <0 0x5c050000 0x0 0x1000>, <0 0x5c0b0000 0x0 0x1000>; diff --git a/Documentation/devicetree/bindings/dma/zxdma.txt b/Documentation/devicetree/bindings/dma/zxdma.txt index abec59f35fde949baf60210017407e45e8cffce6..0ab80f69e566ef7c66001486a7cd5acea5685df8 100644 --- a/Documentation/devicetree/bindings/dma/zxdma.txt +++ b/Documentation/devicetree/bindings/dma/zxdma.txt @@ -13,7 +13,7 @@ Required properties: Example: Controller: - dma: dma-controller@0x09c00000{ + dma: dma-controller@09c00000{ compatible = "zte,zx296702-dma"; reg = <0x09c00000 0x1000>; clocks = <&topclk ZX296702_DMA_ACLK>; diff --git a/Documentation/devicetree/bindings/eeprom/at25.txt b/Documentation/devicetree/bindings/eeprom/at25.txt index 1d3447165c374f673aa9a717f94f2387cfd852f1..e823d90b802f7f8f293f9c0ba4f06533361824bd 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.txt +++ b/Documentation/devicetree/bindings/eeprom/at25.txt @@ -1,7 +1,12 @@ EEPROMs (SPI) compatible with Atmel at25. Required properties: -- compatible : "atmel,at25". +- compatible : Should be ",", and generic value "atmel,at25". + Example "," values: + "microchip,25lc040" + "st,m95m02" + "st,m95256" + - reg : chip select number - spi-max-frequency : max spi frequency to use - pagesize : size of the eeprom page @@ -13,7 +18,7 @@ Optional properties: - spi-cpol : SPI inverse clock polarity, as per spi-bus bindings. - read-only : this parameter-less property disables writes to the eeprom -Obsolete legacy properties are can be used in place of "size", "pagesize", +Obsolete legacy properties can be used in place of "size", "pagesize", "address-width", and "read-only": - at25,byte-len : total eeprom size in bytes - at25,addr-mode : addr-mode flags, as defined in include/linux/spi/eeprom.h @@ -22,8 +27,8 @@ Obsolete legacy properties are can be used in place of "size", "pagesize", Additional compatible properties are also allowed. Example: - at25@0 { - compatible = "atmel,at25", "st,m95256"; + eeprom@0 { + compatible = "st,m95256", "atmel,at25"; reg = <0> spi-max-frequency = <5000000>; spi-cpha; diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt index 826a7208ca93a8f5cd31f4a94c96b36ce6c7dab1..146e554b3c6769cfe10238acc1e9b48d9a5e3d28 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt @@ -30,7 +30,7 @@ Optional properties: Example: -gpio_altr: gpio@0xff200000 { +gpio_altr: gpio@ff200000 { compatible = "altr,pio-1.0"; reg = <0xff200000 0x10>; interrupts = <0 45 4>; diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt index 7f57271df2bc96b41168d337c911511ccb726e33..0d0158728f897bd9fbeaa41e884955445ca1f062 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt @@ -27,7 +27,7 @@ Required properties: ti,tca6424 ti,tca9539 ti,tca9554 - onsemi,pca9654 + onnn,pca9654 exar,xra1202 Optional properties: diff --git a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt index 231e4cc4008cbcf0783c6ab438fa3eca1815c4fb..d4a082acf92f0f7b98cdd0702c47de3bbed30725 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt @@ -18,7 +18,7 @@ Optional properties: Example / { - i2c4: i2c4@0x10054000 { + i2c4: i2c4@10054000 { compatible = "ingenic,jz4780-i2c"; reg = <0x10054000 0x1000>; diff --git a/Documentation/devicetree/bindings/iio/pressure/hp03.txt b/Documentation/devicetree/bindings/iio/pressure/hp03.txt index 54e7e70bcea52da324caf4f6c277573cca91f823..831dbee7a5c35abee4e2d64e5c2c30de653fbe7a 100644 --- a/Documentation/devicetree/bindings/iio/pressure/hp03.txt +++ b/Documentation/devicetree/bindings/iio/pressure/hp03.txt @@ -10,7 +10,7 @@ Required properties: Example: -hp03@0x77 { +hp03@77 { compatible = "hoperf,hp03"; reg = <0x77>; xclr-gpio = <&portc 0 0x0>; diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt index ca5a2c86480cff1ec8f68168a9f7c94c1a29ef01..56d835242af2d834f440e3d149011b38e5a7a65f 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt +++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt @@ -15,7 +15,7 @@ Optional properties: Example: i2c@80110000 { - bu21013_tp@0x5c { + bu21013_tp@5c { compatible = "rohm,bu21013_tp"; reg = <0x5c>; touch-gpio = <&gpio2 20 0x4>; diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt index 560d8a727b8f84e951b227d8fcc838579ddc94b1..2f324464864658a80a13b1cdab0395282f280b8e 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic.txt @@ -155,7 +155,7 @@ Example: <0x0 0xe112f000 0 0x02000>, <0x0 0xe1140000 0 0x10000>, <0x0 0xe1160000 0 0x10000>; - v2m0: v2m@0x8000 { + v2m0: v2m@8000 { compatible = "arm,gic-v2m-frame"; msi-controller; reg = <0x0 0x80000 0 0x1000>; @@ -163,7 +163,7 @@ Example: .... - v2mN: v2m@0x9000 { + v2mN: v2m@9000 { compatible = "arm,gic-v2m-frame"; msi-controller; reg = <0x0 0x90000 0 0x1000>; diff --git a/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt index 80994adab3928905de7c44bbc725a1f6f345ca74..42431f44697fb22cee723d176ef4ce9d1cdc4ebd 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/img,meta-intc.txt @@ -71,7 +71,7 @@ Example 2: * An interrupt generating device that is wired to a Meta external * trigger block. */ - uart1: uart@0x02004c00 { + uart1: uart@02004c00 { // Interrupt source '5' that is level-sensitive. // Note that there are only two cells as specified in the // interrupt parent's '#interrupt-cells' property. diff --git a/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt index a691185503441ec2ed37e292c8412af12a116f8e..5dc2a55ad81143d86d8e781f147342ad6a21848e 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/img,pdc-intc.txt @@ -51,7 +51,7 @@ Example 1: /* * TZ1090 PDC block */ - pdc: pdc@0x02006000 { + pdc: pdc@02006000 { // This is an interrupt controller node. interrupt-controller; diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt b/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt index 715a013ed4bdef1ada6217e5f5540df142d385ce..2ab0ea39867b516dd9462e8b71142ddf8d8a5f8d 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/st,spear3xx-shirq.txt @@ -39,7 +39,7 @@ Example: The following is an example from the SPEAr320 SoC dtsi file. -shirq: interrupt-controller@0xb3000000 { +shirq: interrupt-controller@b3000000 { compatible = "st,spear320-shirq"; reg = <0xb3000000 0x1000>; interrupts = <28 29 30 1>; diff --git a/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt b/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt index c2619797ce0c92f06b41117940209f4c9e37ca1c..49cfc8c337c4644a5b7cd271660fe88b5ec28412 100644 --- a/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt +++ b/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt @@ -14,7 +14,7 @@ Optional properties: depends on the interrupt controller parent. Example: - mbox_tx: mailbox@0x100 { + mbox_tx: mailbox@100 { compatible = "altr,mailbox-1.0"; reg = <0x100 0x8>; interrupt-parent = < &gic_0 >; @@ -22,7 +22,7 @@ Example: #mbox-cells = <1>; }; - mbox_rx: mailbox@0x200 { + mbox_rx: mailbox@200 { compatible = "altr,mailbox-1.0"; reg = <0x200 0x8>; interrupt-parent = < &gic_0 >; @@ -40,7 +40,7 @@ support only one channel).The equivalent "mbox-names" property value can be used to give a name to the communication channel to be used by the client user. Example: - mclient0: mclient0@0x400 { + mclient0: mclient0@400 { compatible = "client-1.0"; reg = <0x400 0x10>; mbox-names = "mbox-tx", "mbox-rx"; diff --git a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt index 0f3ee81d92c297022adcdbb1f9689f4b924ea143..9bcdf2087625c108a8180060f3dd9ebb4c111a46 100644 --- a/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt +++ b/Documentation/devicetree/bindings/mailbox/brcm,iproc-pdc-mbox.txt @@ -15,7 +15,7 @@ Optional properties: - brcm,use-bcm-hdr: present if a BCM header precedes each frame. Example: - pdc0: iproc-pdc0@0x612c0000 { + pdc0: iproc-pdc0@612c0000 { compatible = "brcm,iproc-pdc-mbox"; reg = <0 0x612c0000 0 0x445>; /* PDC FS0 regs */ interrupts = ; diff --git a/Documentation/devicetree/bindings/media/exynos5-gsc.txt b/Documentation/devicetree/bindings/media/exynos5-gsc.txt index 0d4fdaedc6f1e82aec78f8080b6c312104b3cb0e..bc963a6d305a8bbc0e0ca2a231eea89d3247087c 100644 --- a/Documentation/devicetree/bindings/media/exynos5-gsc.txt +++ b/Documentation/devicetree/bindings/media/exynos5-gsc.txt @@ -17,7 +17,7 @@ Optional properties: Example: -gsc_0: gsc@0x13e00000 { +gsc_0: gsc@13e00000 { compatible = "samsung,exynos5250-gsc"; reg = <0x13e00000 0x1000>; interrupts = <0 85 0>; diff --git a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt index 46c15c54175d87cca499ceb81c33fdc6560df5b0..2a615d84a682563d28499b9a1f26a5ca2abcd984 100644 --- a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt +++ b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt @@ -68,7 +68,7 @@ vcodec_dec: vcodec@16000000 { "vdec_bus_clk_src"; }; - vcodec_enc: vcodec@0x18002000 { + vcodec_enc: vcodec@18002000 { compatible = "mediatek,mt8173-vcodec-enc"; reg = <0 0x18002000 0 0x1000>, /*VENC_SYS*/ <0 0x19002000 0 0x1000>; /*VENC_LT_SYS*/ diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt index 6e4ef8caf759e5d31179c00120eccec349f3f8cd..19357d0bbe6539b3fbb7c73a4adbe4509fc905de 100644 --- a/Documentation/devicetree/bindings/media/rcar_vin.txt +++ b/Documentation/devicetree/bindings/media/rcar_vin.txt @@ -44,7 +44,7 @@ Device node example vin0 = &vin0; }; - vin0: vin@0xe6ef0000 { + vin0: vin@e6ef0000 { compatible = "renesas,vin-r8a7790", "renesas,rcar-gen2-vin"; clocks = <&mstp8_clks R8A7790_CLK_VIN0>; reg = <0 0xe6ef0000 0 0x1000>; diff --git a/Documentation/devicetree/bindings/media/samsung-fimc.txt b/Documentation/devicetree/bindings/media/samsung-fimc.txt index e4e15d8d752157909e03344ee04ce7621ba5b967..48c599dacbdf7baafbc2cc3a64c170a44ba95313 100644 --- a/Documentation/devicetree/bindings/media/samsung-fimc.txt +++ b/Documentation/devicetree/bindings/media/samsung-fimc.txt @@ -138,7 +138,7 @@ Example: }; /* MIPI CSI-2 bus IF sensor */ - s5c73m3: sensor@0x1a { + s5c73m3: sensor@1a { compatible = "samsung,s5c73m3"; reg = <0x1a>; vddio-supply = <...>; diff --git a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt index 1ce4e46bcbb768a91ba16e27f19fa415769ee802..17a8e81ca0cc0ecd3749bc65a32abd7a4ce58bfb 100644 --- a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt +++ b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt @@ -8,7 +8,7 @@ Bindings, specific for the sh_mobile_ceu_camera.c driver: Example: -ceu0: ceu@0xfe910000 { +ceu0: ceu@fe910000 { compatible = "renesas,sh-mobile-ceu"; reg = <0xfe910000 0xa0>; interrupt-parent = <&intcs>; diff --git a/Documentation/devicetree/bindings/media/video-interfaces.txt b/Documentation/devicetree/bindings/media/video-interfaces.txt index 3994b0143dd1ef37c9ce28462f686e51ccdadc4e..258b8dfddf48c1e218b5e3e1a8a41b70b4baeeec 100644 --- a/Documentation/devicetree/bindings/media/video-interfaces.txt +++ b/Documentation/devicetree/bindings/media/video-interfaces.txt @@ -154,7 +154,7 @@ imx074 is linked to ceu0 through the MIPI CSI-2 receiver (csi2). ceu0 has a 'port' node which may indicate that at any time only one of the following data pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0. - ceu0: ceu@0xfe910000 { + ceu0: ceu@fe910000 { compatible = "renesas,sh-mobile-ceu"; reg = <0xfe910000 0xa0>; interrupts = <0x880>; @@ -193,9 +193,9 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0. }; }; - i2c0: i2c@0xfff20000 { + i2c0: i2c@fff20000 { ... - ov772x_1: camera@0x21 { + ov772x_1: camera@21 { compatible = "ovti,ov772x"; reg = <0x21>; vddio-supply = <®ulator1>; @@ -219,7 +219,7 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0. }; }; - imx074: camera@0x1a { + imx074: camera@1a { compatible = "sony,imx074"; reg = <0x1a>; vddio-supply = <®ulator1>; @@ -239,7 +239,7 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0. }; }; - csi2: csi2@0xffc90000 { + csi2: csi2@ffc90000 { compatible = "renesas,sh-mobile-csi2"; reg = <0xffc90000 0x1000>; interrupts = <0x17a0>; diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt index fd823d6091b29514e0db29a3b73b1c47edbf064f..152eeccbde1ccd7f3e7686867404733c3f0f8a29 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt +++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt @@ -46,7 +46,7 @@ Optional properties: Example: -emif1: emif@0x4c000000 { +emif1: emif@4c000000 { compatible = "ti,emif-4d"; ti,hwmods = "emif2"; phy-type = <1>; diff --git a/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt b/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt index 20963c76b4bcbd30d8d5081ddab600d1c78ef39a..71a1f5963936b855d2ec7fc195e754cf5ad6ec02 100644 --- a/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt +++ b/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt @@ -13,7 +13,7 @@ Required properties: Example: -devctrl: device-state-control@0x02620000 { +devctrl: device-state-control@02620000 { compatible = "ti,keystone-devctrl", "syscon"; reg = <0x02620000 0x1000>; }; diff --git a/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt b/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt index 6c9f176f35717acf1a2fd3dcd7ca2cee86daeb88..05b47232ed9ed647a97df99e7b4c175352748fb6 100644 --- a/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt +++ b/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt @@ -9,7 +9,7 @@ Required properties: - reg : Location and size of bounce buffer Example: - smc@0x3404c000 { + smc@3404c000 { compatible = "brcm,bcm11351-smc", "brcm,kona-smc"; reg = <0x3404c000 0x400>; //1 KiB in SRAM }; diff --git a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt index aaba2483b4ff8c79f34030b28b3f546c03084032..7f5dd83f5bd95afd1b696e186681caae75c268b1 100644 --- a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt @@ -12,7 +12,7 @@ Refer to clocks/clock-bindings.txt for generic clock consumer properties. Example: -sdio2: sdio@0x3f1a0000 { +sdio2: sdio@3f1a0000 { compatible = "brcm,kona-sdhci"; reg = <0x3f1a0000 0x10000>; clocks = <&sdio3_clk>; diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt index 954561d09a8e6acca1cdfcd433c07da29027c1ca..fa90d253dc7ea00f79de2b7cb99477a87f1682b7 100644 --- a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt +++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt @@ -24,7 +24,7 @@ Optional properties: Example: -sdhci0: sdhci@0x18041000 { +sdhci0: sdhci@18041000 { compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x18041000 0x100>; interrupts = ; diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt index 3a4ac401e6f93a9d8ce3becd4d75410f95f1e50d..19f5508a75696b722624c550700ee74f72b2362f 100644 --- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt +++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt @@ -55,7 +55,7 @@ Examples: [hwmod populated DMA resources] - mmc1: mmc@0x4809c000 { + mmc1: mmc@4809c000 { compatible = "ti,omap4-hsmmc"; reg = <0x4809c000 0x400>; ti,hwmods = "mmc1"; @@ -67,7 +67,7 @@ Examples: [generic DMA request binding] - mmc1: mmc@0x4809c000 { + mmc1: mmc@4809c000 { compatible = "ti,omap4-hsmmc"; reg = <0x4809c000 0x400>; ti,hwmods = "mmc1"; diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt index 131d3a74d0bd453f48c3f31e5487db4e3c71c6e1..c8567b40fe13a02abaee0ffd2f037d0a4282a950 100644 --- a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt +++ b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt @@ -82,15 +82,15 @@ gpmc: gpmc@6e000000 { label = "bootloader-nor"; reg = <0 0x40000>; }; - partition@0x40000 { + partition@40000 { label = "params-nor"; reg = <0x40000 0x40000>; }; - partition@0x80000 { + partition@80000 { label = "kernel-nor"; reg = <0x80000 0x200000>; }; - partition@0x280000 { + partition@280000 { label = "filesystem-nor"; reg = <0x240000 0x7d80000>; }; diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt index 376fa2f50e6bc9b41052928037acd4b3a382d380..956bb046e599d576e3f881b2901e0d369a3c9802 100644 --- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt +++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt @@ -13,7 +13,6 @@ Required properties: at25df321a at25df641 at26df081a - en25s64 mr25h128 mr25h256 mr25h10 @@ -33,7 +32,6 @@ Required properties: s25fl008k s25fl064k sst25vf040b - sst25wf040b m25p40 m25p80 m25p16 diff --git a/Documentation/devicetree/bindings/mtd/mtk-nand.txt b/Documentation/devicetree/bindings/mtd/mtk-nand.txt index dbf9e054c11c0f3a68ba2783a042ca25e12d6b78..0431841de781334ce38aab9877ccd7a8d3698079 100644 --- a/Documentation/devicetree/bindings/mtd/mtk-nand.txt +++ b/Documentation/devicetree/bindings/mtd/mtk-nand.txt @@ -131,7 +131,7 @@ Example: read-only; reg = <0x00000000 0x00400000>; }; - android@0x00400000 { + android@00400000 { label = "android"; reg = <0x00400000 0x12c00000>; }; diff --git a/Documentation/devicetree/bindings/net/altera_tse.txt b/Documentation/devicetree/bindings/net/altera_tse.txt index a706297998e9b48a5eb8924b0e622ae807532757..0e21df94a53ffa221e6262f4323dc2ff60068a00 100644 --- a/Documentation/devicetree/bindings/net/altera_tse.txt +++ b/Documentation/devicetree/bindings/net/altera_tse.txt @@ -52,7 +52,7 @@ Optional properties: Example: - tse_sub_0_eth_tse_0: ethernet@0x1,00000000 { + tse_sub_0_eth_tse_0: ethernet@1,00000000 { compatible = "altr,tse-msgdma-1.0"; reg = <0x00000001 0x00000000 0x00000400>, <0x00000001 0x00000460 0x00000020>, @@ -90,7 +90,7 @@ Example: }; }; - tse_sub_1_eth_tse_0: ethernet@0x1,00001000 { + tse_sub_1_eth_tse_0: ethernet@1,00001000 { compatible = "altr,tse-msgdma-1.0"; reg = <0x00000001 0x00001000 0x00000400>, <0x00000001 0x00001460 0x00000020>, diff --git a/Documentation/devicetree/bindings/net/mdio.txt b/Documentation/devicetree/bindings/net/mdio.txt index 96a53f89aa6e2fa7f9a0d4c6c3b18d03a3b75994..e3e1603f256c1a55ce956a403fce855e7bf58e0e 100644 --- a/Documentation/devicetree/bindings/net/mdio.txt +++ b/Documentation/devicetree/bindings/net/mdio.txt @@ -18,7 +18,7 @@ Example : This example shows these optional properties, plus other properties required for the TI Davinci MDIO driver. - davinci_mdio: ethernet@0x5c030000 { + davinci_mdio: ethernet@5c030000 { compatible = "ti,davinci_mdio"; reg = <0x5c030000 0x1000>; #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt index b30d04b54ee94f93910421caca3badda8fcae01e..17d6819669c8ccee0186f4dc797be638a1828097 100644 --- a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt +++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt @@ -28,7 +28,7 @@ Required properties: Example: -gmii_to_sgmii_converter: phy@0x100000240 { +gmii_to_sgmii_converter: phy@100000240 { compatible = "altr,gmii-to-sgmii-2.0"; reg = <0x00000001 0x00000240 0x00000008>, <0x00000001 0x00000200 0x00000040>; diff --git a/Documentation/devicetree/bindings/nios2/nios2.txt b/Documentation/devicetree/bindings/nios2/nios2.txt index d6d0a94cb3bbba6634a1c365df92670c84499a94..b95e831bcba3f9824bebf3ce75ee11c2a0b4f203 100644 --- a/Documentation/devicetree/bindings/nios2/nios2.txt +++ b/Documentation/devicetree/bindings/nios2/nios2.txt @@ -36,7 +36,7 @@ Optional properties: Example: -cpu@0x0 { +cpu@0 { device_type = "cpu"; compatible = "altr,nios2-1.0"; reg = <0>; diff --git a/Documentation/devicetree/bindings/pci/altera-pcie.txt b/Documentation/devicetree/bindings/pci/altera-pcie.txt index 495880193adc8c9e9403b5fda893df580718630c..a1dc9366a8fcac6683c99d4d1aeb961b836d44cc 100644 --- a/Documentation/devicetree/bindings/pci/altera-pcie.txt +++ b/Documentation/devicetree/bindings/pci/altera-pcie.txt @@ -25,7 +25,7 @@ Optional properties: - bus-range: PCI bus numbers covered Example - pcie_0: pcie@0xc00000000 { + pcie_0: pcie@c00000000 { compatible = "altr,pcie-root-port-1.0"; reg = <0xc0000000 0x20000000>, <0xff220000 0x00004000>; diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt index 7b1e48bf172b74ba51755409cf8510dc0a15fb6d..149d8f7f86b06ed943b7c0c1331d777a593db55f 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt @@ -52,7 +52,7 @@ Additional required properties for imx7d-pcie: Example: - pcie@0x01000000 { + pcie@01000000 { compatible = "fsl,imx6q-pcie", "snps,dw-pcie"; reg = <0x01ffc000 0x04000>, <0x01f00000 0x80000>; diff --git a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt index bdb7ab39d2d7effafede0803adef4eeef1d9bf9a..7bf9df047a1ee4992ee7f8b9f8e3f742b26bc8a0 100644 --- a/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt +++ b/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt @@ -21,7 +21,7 @@ Optional properties: - dma-coherent: Present if DMA operations are coherent. Hip05 Example (note that Hip06 is the same except compatible): - pcie@0xb0080000 { + pcie@b0080000 { compatible = "hisilicon,hip05-pcie", "snps,dw-pcie"; reg = <0 0xb0080000 0 0x10000>, <0x220 0x00000000 0 0x2000>; reg-names = "rc_dbi", "config"; diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt index cbc7847dbf6c59ffeb845f95682d9b9a427d81b8..c1ce5a0a652ecbbbb9e61b251b6d41eaad52433f 100644 --- a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt +++ b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt @@ -45,7 +45,7 @@ Optional properties: - usb3_vbus-supply : regulator phandle for controller usb3 vbus Example: - usbphy: phy@0x01c13400 { + usbphy: phy@01c13400 { #phy-cells = <1>; compatible = "allwinner,sun4i-a10-usb-phy"; /* phy base regs, phy1 pmu reg, phy2 pmu reg */ diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt index 3600d5c6c4d7d5372f405fbd4c146e41dfa24071..3914529a3214b61b6dbc8c2fa6e49cd90bc8d6a5 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt +++ b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt @@ -25,7 +25,7 @@ Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt For example: - pinmux: pinmux@0x0301d0c8 { + pinmux: pinmux@0301d0c8 { compatible = "brcm,cygnus-pinmux"; reg = <0x0301d0c8 0x1b0>; diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt index eecf028ff4854bbe994b6232a7f88890e759d3cf..bf9b07016c8730f29a199e100065d19e90e9200a 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-atlas7.txt @@ -96,14 +96,14 @@ For example, pinctrl might have subnodes like the following: For a specific board, if it wants to use sd1, it can add the following to its board-specific .dts file. -sd1: sd@0x12340000 { +sd1: sd@12340000 { pinctrl-names = "default"; pinctrl-0 = <&sd1_pmx0>; } or -sd1: sd@0x12340000 { +sd1: sd@12340000 { pinctrl-names = "default"; pinctrl-0 = <&sd1_pmx1>; } diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt index 5f55be59d914a33aa71f4f432237782fd45f46ad..f8420520e14bf6d48467585c8260455f12da1540 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt @@ -41,7 +41,7 @@ For example, pinctrl might have subnodes like the following: For a specific board, if it wants to use uart2 without hardware flow control, it can add the following to its board-specific .dts file. -uart2: uart@0xb0070000 { +uart2: uart@b0070000 { pinctrl-names = "default"; pinctrl-0 = <&uart2_noflow_pins_a>; } diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt index 4864e3a74de311ff02e6f3c54a04ae2fbba5ae2d..a01a3b8a23632a8707ff9e528c6c010500d8da9a 100644 --- a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt @@ -136,7 +136,7 @@ Example for rk3188: #size-cells = <1>; ranges; - gpio0: gpio0@0x2000a000 { + gpio0: gpio0@2000a000 { compatible = "rockchip,rk3188-gpio-bank0"; reg = <0x2000a000 0x100>; interrupts = ; @@ -149,7 +149,7 @@ Example for rk3188: #interrupt-cells = <2>; }; - gpio1: gpio1@0x2003c000 { + gpio1: gpio1@2003c000 { compatible = "rockchip,gpio-bank"; reg = <0x2003c000 0x100>; interrupts = ; diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt index 378f6dc8b8bd102b64cbf477aa203c41115d9b00..3cbf56ce66ea9871ad2e41068baaef6171ae7ca3 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.txt +++ b/Documentation/devicetree/bindings/regulator/regulator.txt @@ -107,7 +107,7 @@ regulators (twl_reg1 and twl_reg2), ... }; - mmc: mmc@0x0 { + mmc: mmc@0 { ... ... vmmc-supply = <&twl_reg1>; diff --git a/Documentation/devicetree/bindings/serial/efm32-uart.txt b/Documentation/devicetree/bindings/serial/efm32-uart.txt index 8adbab268ca3997f50c134081d2f84409d8d10e3..4f8d8fde0c1c2a0728ac3c331d694e7a1bdbd423 100644 --- a/Documentation/devicetree/bindings/serial/efm32-uart.txt +++ b/Documentation/devicetree/bindings/serial/efm32-uart.txt @@ -12,7 +12,7 @@ Optional properties: Example: -uart@0x4000c400 { +uart@4000c400 { compatible = "energymicro,efm32-uart"; reg = <0x4000c400 0x400>; interrupts = <15>; diff --git a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt index f311472990a7e5f081dad3cdba160e2d5edc7abc..75996b6111bb7d95f5f6effb28d27679fc1a5400 100644 --- a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt +++ b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt @@ -14,7 +14,7 @@ Required properties: Example: - ps20: ps2@0x01c2a000 { + ps20: ps2@01c2a000 { compatible = "allwinner,sun4i-a10-ps2"; reg = <0x01c2a000 0x400>; interrupts = <0 62 4>; diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt index 64c66a5644e7551c2637b4aebff3d81f12cdf48f..77cd42cc5f54897b60371af222d62f847e97326f 100644 --- a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt +++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt @@ -220,7 +220,7 @@ qmss: qmss@2a40000 { #address-cells = <1>; #size-cells = <1>; ranges; - pdsp0@0x2a10000 { + pdsp0@2a10000 { reg = <0x2a10000 0x1000>, <0x2a0f000 0x100>, <0x2a0c000 0x3c8>, diff --git a/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt b/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt index 5875ca459ed11355feccc4c106e04e5df2a71ce3..4248b662deff04e4fbbbc3effa487e09fa140bee 100644 --- a/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt +++ b/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt @@ -21,7 +21,7 @@ please check: Example: - i2s: i2s@0x77600000 { + i2s: i2s@77600000 { compatible = "adi,axi-i2s-1.00.a"; reg = <0x77600000 0x1000>; clocks = <&clk 15>, <&audio_clock>; diff --git a/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt b/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt index 4eb7997674a09006dfa0a991b634082ddc64974d..7b664e7cb4ae99dfae7f1beb52bba140f6ee1dd7 100644 --- a/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt +++ b/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt @@ -20,7 +20,7 @@ please check: Example: - spdif: spdif@0x77400000 { + spdif: spdif@77400000 { compatible = "adi,axi-spdif-tx-1.00.a"; reg = <0x77600000 0x1000>; clocks = <&clk 15>, <&audio_clock>; diff --git a/Documentation/devicetree/bindings/sound/ak4613.txt b/Documentation/devicetree/bindings/sound/ak4613.txt index 1783f9ef093096a0f06c1daf8ba854bba91e397d..49a2e74fd9cb1489b145b1132f2f892209c8b448 100644 --- a/Documentation/devicetree/bindings/sound/ak4613.txt +++ b/Documentation/devicetree/bindings/sound/ak4613.txt @@ -20,7 +20,7 @@ Optional properties: Example: &i2c { - ak4613: ak4613@0x10 { + ak4613: ak4613@10 { compatible = "asahi-kasei,ak4613"; reg = <0x10>; }; diff --git a/Documentation/devicetree/bindings/sound/ak4642.txt b/Documentation/devicetree/bindings/sound/ak4642.txt index 340784db6808809eecf4ae0e310023550713e2c6..58e48ee9717547f2766a2c0a89fec19536431634 100644 --- a/Documentation/devicetree/bindings/sound/ak4642.txt +++ b/Documentation/devicetree/bindings/sound/ak4642.txt @@ -17,7 +17,7 @@ Optional properties: Example 1: &i2c { - ak4648: ak4648@0x12 { + ak4648: ak4648@12 { compatible = "asahi-kasei,ak4642"; reg = <0x12>; }; diff --git a/Documentation/devicetree/bindings/sound/da7218.txt b/Documentation/devicetree/bindings/sound/da7218.txt index 5ca5a709b6aa1989901eff6b9239f0bc71d0d272..3ab9dfef38d113523549e66cee20ad8db6e56842 100644 --- a/Documentation/devicetree/bindings/sound/da7218.txt +++ b/Documentation/devicetree/bindings/sound/da7218.txt @@ -73,7 +73,7 @@ Example: compatible = "dlg,da7218"; reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; wakeup-source; VDD-supply = <®_audio>; diff --git a/Documentation/devicetree/bindings/sound/da7219.txt b/Documentation/devicetree/bindings/sound/da7219.txt index cf61681826b675ad984dc17f6c1dbcc2e1b6fc35..5b54d2d045c355808bf0f58afba9ec7539ff4472 100644 --- a/Documentation/devicetree/bindings/sound/da7219.txt +++ b/Documentation/devicetree/bindings/sound/da7219.txt @@ -77,7 +77,7 @@ Example: reg = <0x1a>; interrupt-parent = <&gpio6>; - interrupts = <11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <11 IRQ_TYPE_LEVEL_LOW>; VDD-supply = <®_audio>; VDDMIC-supply = <®_audio>; diff --git a/Documentation/devicetree/bindings/sound/max98371.txt b/Documentation/devicetree/bindings/sound/max98371.txt index 6c285235e64be59e0cf55bfee5b54920a58dea36..8b2b2704b574e248c426565514076e2a42aa0631 100644 --- a/Documentation/devicetree/bindings/sound/max98371.txt +++ b/Documentation/devicetree/bindings/sound/max98371.txt @@ -10,7 +10,7 @@ Required properties: Example: &i2c { - max98371: max98371@0x31 { + max98371: max98371@31 { compatible = "maxim,max98371"; reg = <0x31>; }; diff --git a/Documentation/devicetree/bindings/sound/max9867.txt b/Documentation/devicetree/bindings/sound/max9867.txt index 394cd4eb17ec47531edabc161f93506925775805..b8bd914ee697b1435f0ebb47e4d3062961c61859 100644 --- a/Documentation/devicetree/bindings/sound/max9867.txt +++ b/Documentation/devicetree/bindings/sound/max9867.txt @@ -10,7 +10,7 @@ Required properties: Example: &i2c { - max9867: max9867@0x18 { + max9867: max9867@18 { compatible = "maxim,max9867"; reg = <0x18>; }; diff --git a/Documentation/devicetree/bindings/sound/renesas,fsi.txt b/Documentation/devicetree/bindings/sound/renesas,fsi.txt index 0d0ab51105b01ece9c0dfbe6571e6d2086550f42..0cf0f819b8236e250fd0f0db1bc0dba879e9dcd4 100644 --- a/Documentation/devicetree/bindings/sound/renesas,fsi.txt +++ b/Documentation/devicetree/bindings/sound/renesas,fsi.txt @@ -20,7 +20,7 @@ Required properties: Example: -sh_fsi2: sh_fsi2@0xec230000 { +sh_fsi2: sh_fsi2@ec230000 { compatible = "renesas,sh_fsi2"; reg = <0xec230000 0x400>; interrupts = <0 146 0x4>; diff --git a/Documentation/devicetree/bindings/sound/rockchip-spdif.txt b/Documentation/devicetree/bindings/sound/rockchip-spdif.txt index 0a1dc4e1815cb524faee4c228a14ddeeea626578..ec20c1271e929c17e8c24e736989cdbbd4ec9ce9 100644 --- a/Documentation/devicetree/bindings/sound/rockchip-spdif.txt +++ b/Documentation/devicetree/bindings/sound/rockchip-spdif.txt @@ -33,7 +33,7 @@ Required properties on RK3288: Example for the rk3188 SPDIF controller: -spdif: spdif@0x1011e000 { +spdif: spdif@1011e000 { compatible = "rockchip,rk3188-spdif", "rockchip,rk3066-spdif"; reg = <0x1011e000 0x2000>; interrupts = ; diff --git a/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt b/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt index 40068ec0e9a561db3128fe37d581b1a4499b5ddf..9c1ee52fed5bff6300b5863b0c3fc07aeceb5dc8 100644 --- a/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt +++ b/Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt @@ -51,7 +51,7 @@ Optional properties: Example: - sti_uni_player1: sti-uni-player@0x8D81000 { + sti_uni_player1: sti-uni-player@8D81000 { compatible = "st,stih407-uni-player-hdmi"; #sound-dai-cells = <0>; st,syscfg = <&syscfg_core>; @@ -63,7 +63,7 @@ Example: st,tdm-mode = <1>; }; - sti_uni_player2: sti-uni-player@0x8D82000 { + sti_uni_player2: sti-uni-player@8D82000 { compatible = "st,stih407-uni-player-pcm-out"; #sound-dai-cells = <0>; st,syscfg = <&syscfg_core>; @@ -74,7 +74,7 @@ Example: dma-names = "tx"; }; - sti_uni_player3: sti-uni-player@0x8D85000 { + sti_uni_player3: sti-uni-player@8D85000 { compatible = "st,stih407-uni-player-spdif"; #sound-dai-cells = <0>; st,syscfg = <&syscfg_core>; @@ -85,7 +85,7 @@ Example: dma-names = "tx"; }; - sti_uni_reader1: sti-uni-reader@0x8D84000 { + sti_uni_reader1: sti-uni-reader@8D84000 { compatible = "st,stih407-uni-reader-hdmi"; #sound-dai-cells = <0>; st,syscfg = <&syscfg_core>; diff --git a/Documentation/devicetree/bindings/spi/efm32-spi.txt b/Documentation/devicetree/bindings/spi/efm32-spi.txt index 2c1e6a43930baa88426aa4822f6ec79ee8bbc10e..e0fa61a1be0c864258574787d8fa496db2118916 100644 --- a/Documentation/devicetree/bindings/spi/efm32-spi.txt +++ b/Documentation/devicetree/bindings/spi/efm32-spi.txt @@ -19,7 +19,7 @@ Recommended properties : Example: -spi1: spi@0x4000c400 { /* USART1 */ +spi1: spi@4000c400 { /* USART1 */ #address-cells = <1>; #size-cells = <0>; compatible = "energymicro,efm32-spi"; diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt index 5bf13960f7f4a3c826c10b1e15a618df82d82403..e3c48b20b1a691b37d0b425251a257c682a38eca 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt @@ -12,24 +12,30 @@ Required properties: - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc - reg : Offset and length of the register set for the device - interrupts : Should contain CSPI/eCSPI interrupt -- cs-gpios : Specifies the gpio pins to be used for chipselects. - clocks : Clock specifiers for both ipg and per clocks. - clock-names : Clock names should include both "ipg" and "per" See the clock consumer binding, Documentation/devicetree/bindings/clock/clock-bindings.txt -- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request names should include "tx" and "rx" if present. -Obsolete properties: -- fsl,spi-num-chipselects : Contains the number of the chipselect +Recommended properties: +- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip +select lines can be used, they appear to always generate a pulse between each +word of a transfer. Most use cases will require GPIO based chip selects to +generate a valid transaction. Optional properties: +- num-cs : Number of total chip selects, see spi-bus.txt. +- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, +Documentation/devicetree/bindings/dma/dma.txt. +- dma-names: DMA request names, if present, should include "tx" and "rx". - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register controlling the SPI_READY handling. Note that to enable the DRCTL consideration, the SPI_READY mode-flag needs to be set too. Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst). +Obsolete properties: +- fsl,spi-num-chipselects : Contains the number of the chipselect + Example: ecspi@70010000 { diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 88b6ea1ad2903cadd73650ff6dbfa0eb54aac39f..44d7cb2cb2c023ec8f1bbcaa90c26f7d406f9abf 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -239,7 +239,7 @@ cpus { * A simple fan controller which supports 10 speeds of operation * (represented as 0-9). */ - fan0: fan@0x48 { + fan0: fan@48 { ... cooling-min-level = <0>; cooling-max-level = <9>; @@ -252,7 +252,7 @@ ocp { /* * A simple IC with a single bandgap temperature sensor. */ - bandgap0: bandgap@0x0000ED00 { + bandgap0: bandgap@0000ED00 { ... #thermal-sensor-cells = <0>; }; @@ -330,7 +330,7 @@ ocp { /* * A simple IC with several bandgap temperature sensors. */ - bandgap0: bandgap@0x0000ED00 { + bandgap0: bandgap@0000ED00 { ... #thermal-sensor-cells = <1>; }; @@ -447,7 +447,7 @@ one thermal zone. /* * A simple IC with a single temperature sensor. */ - adc: sensor@0x49 { + adc: sensor@49 { ... #thermal-sensor-cells = <0>; }; @@ -458,7 +458,7 @@ ocp { /* * A simple IC with a single bandgap temperature sensor. */ - bandgap0: bandgap@0x0000ED00 { + bandgap0: bandgap@0000ED00 { ... #thermal-sensor-cells = <0>; }; @@ -516,7 +516,7 @@ with many sensors and many cooling devices. /* * An IC with several temperature sensor. */ - adc_dummy: sensor@0x50 { + adc_dummy: sensor@50 { ... #thermal-sensor-cells = <1>; /* sensor internal ID */ }; diff --git a/Documentation/devicetree/bindings/ufs/ufs-qcom.txt b/Documentation/devicetree/bindings/ufs/ufs-qcom.txt index 1f69ee1a61ea25b776671be1becb8e8f63a29eb1..21d9a93db2e970cde487151f667c93063a8ae5f9 100644 --- a/Documentation/devicetree/bindings/ufs/ufs-qcom.txt +++ b/Documentation/devicetree/bindings/ufs/ufs-qcom.txt @@ -32,7 +32,7 @@ Optional properties: Example: - ufsphy1: ufsphy@0xfc597000 { + ufsphy1: ufsphy@fc597000 { compatible = "qcom,ufs-phy-qmp-20nm"; reg = <0xfc597000 0x800>; reg-names = "phy_mem"; @@ -53,7 +53,7 @@ Example: <&clock_gcc clk_gcc_ufs_rx_cfg_clk>; }; - ufshc@0xfc598000 { + ufshc@fc598000 { ... phys = <&ufsphy1>; phy-names = "ufsphy"; diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt index a99ed5565b26c406ef6e643c99d6c1eb08fedeac..c39dfef76a183ad80eb27b36a820b3ee6c8107b6 100644 --- a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt +++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt @@ -46,7 +46,7 @@ Note: If above properties are not defined it can be assumed that the supply regulators or clocks are always on. Example: - ufshc@0xfc598000 { + ufshc@fc598000 { compatible = "jedec,ufs-1.1"; reg = <0xfc598000 0x800>; interrupts = <0 28 0>; diff --git a/Documentation/devicetree/bindings/usb/am33xx-usb.txt b/Documentation/devicetree/bindings/usb/am33xx-usb.txt index 7a33f22c815a76ca534bf16f5d158e6d9ae07033..7a198a30408abf3f8a164189d60248f1dfcddc8b 100644 --- a/Documentation/devicetree/bindings/usb/am33xx-usb.txt +++ b/Documentation/devicetree/bindings/usb/am33xx-usb.txt @@ -95,6 +95,7 @@ usb: usb@47400000 { reg = <0x47401300 0x100>; reg-names = "phy"; ti,ctrl_mod = <&ctrl_mod>; + #phy-cells = <0>; }; usb0: usb@47401000 { @@ -141,6 +142,7 @@ usb: usb@47400000 { reg = <0x47401b00 0x100>; reg-names = "phy"; ti,ctrl_mod = <&ctrl_mod>; + #phy-cells = <0>; }; usb1: usb@47401800 { diff --git a/Documentation/devicetree/bindings/usb/ehci-st.txt b/Documentation/devicetree/bindings/usb/ehci-st.txt index 9feea6c3e4d998cba087de49f608cba8ce445242..065c91d955ad159a1ff4cbd75d26ff86f0e52c0e 100644 --- a/Documentation/devicetree/bindings/usb/ehci-st.txt +++ b/Documentation/devicetree/bindings/usb/ehci-st.txt @@ -22,7 +22,7 @@ See: Documentation/devicetree/bindings/reset/reset.txt Example: - ehci1: usb@0xfe203e00 { + ehci1: usb@fe203e00 { compatible = "st,st-ehci-300x"; reg = <0xfe203e00 0x100>; interrupts = ; diff --git a/Documentation/devicetree/bindings/usb/ohci-st.txt b/Documentation/devicetree/bindings/usb/ohci-st.txt index d893ec9131c3bdec4cd71e632b932ffd56dfbc9f..44c998c16f85bfbfa4814fffb8dc3d344c14a3de 100644 --- a/Documentation/devicetree/bindings/usb/ohci-st.txt +++ b/Documentation/devicetree/bindings/usb/ohci-st.txt @@ -20,7 +20,7 @@ See: Documentation/devicetree/bindings/reset/reset.txt Example: - ohci0: usb@0xfe1ffc00 { + ohci0: usb@fe1ffc00 { compatible = "st,st-ohci-300x"; reg = <0xfe1ffc00 0x100>; interrupts = ; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 41cb1ff0715072a132f54b6b33421032c59e5399..159dc5c075c2bcfbd3ace6257d59737555189abd 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -173,6 +173,7 @@ itead ITEAD Intelligent Systems Co.Ltd iwave iWave Systems Technologies Pvt. Ltd. jdi Japan Display Inc. jedec JEDEC Solid State Technology Association +jianda Jiandangjing Technology Co., Ltd. karo Ka-Ro electronics GmbH keithkoep Keith & Koep GmbH keymile Keymile GmbH @@ -380,6 +381,7 @@ virtio Virtual I/O Device Specification, developed by the OASIS consortium vivante Vivante Corporation vocore VoCore Studio voipac Voipac Technologies s.r.o. +vot Vision Optical Technology Co., Ltd. wd Western Digital Corp. wetek WeTek Electronics, limited. wexler Wexler diff --git a/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt index e27763ef00497d8645463d084a04e3f07d0c939f..3c7a1cd13b1011f37753f67905561843ce984370 100644 --- a/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt @@ -6,7 +6,7 @@ reg: Register address and length for watchdog registers Example: -watchdog: jz4740-watchdog@0x10002000 { +watchdog: jz4740-watchdog@10002000 { compatible = "ingenic,jz4740-watchdog"; reg = <0x10002000 0x100>; }; diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst index 6245c99af8c1157176fd1d22681bc221917d3d7f..fbbb2831f29f8c7f50675238289478c98f472606 100644 --- a/Documentation/driver-api/dmaengine/client.rst +++ b/Documentation/driver-api/dmaengine/client.rst @@ -185,7 +185,7 @@ The details of these operations are: void dma_async_issue_pending(struct dma_chan *chan); Further APIs: ------------- +------------- 1. Terminate APIs diff --git a/Documentation/driver-api/pci.rst b/Documentation/driver-api/pci.rst index 01a6c8b7d3a7b59dcee9a11d828968e464af9c1c..ca85e5e78b2c439b3279fbe797b67eeaea84b141 100644 --- a/Documentation/driver-api/pci.rst +++ b/Documentation/driver-api/pci.rst @@ -25,9 +25,6 @@ PCI Support Library .. kernel-doc:: drivers/pci/irq.c :export: -.. kernel-doc:: drivers/pci/htirq.c - :export: - .. kernel-doc:: drivers/pci/probe.c :export: diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index c0727dc36271e99eaf844c205fff6d901c3d057b..f2f3f8592a6f5e12bdcd2d56005deaf52030d0f7 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -25,8 +25,8 @@ available from the following download page. At least "mkfs.nilfs2", cleaner or garbage collector) are required. Details on the tools are described in the man pages included in the package. -Project web page: http://nilfs.sourceforge.net/ -Download page: http://nilfs.sourceforge.net/en/download.html +Project web page: https://nilfs.sourceforge.io/ +Download page: https://nilfs.sourceforge.io/en/download.html List info: http://vger.kernel.org/vger-lists.html#linux-nilfs Caveats diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt index 8caa60734647f70a777b8a568ba9f2dd99182fb8..e6a5f4912b6d4a4910ed1d2fc494fff304ab47ff 100644 --- a/Documentation/filesystems/overlayfs.txt +++ b/Documentation/filesystems/overlayfs.txt @@ -156,6 +156,40 @@ handle it in two different ways: root of the overlay. Finally the directory is moved to the new location. +There are several ways to tune the "redirect_dir" feature. + +Kernel config options: + +- OVERLAY_FS_REDIRECT_DIR: + If this is enabled, then redirect_dir is turned on by default. +- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW: + If this is enabled, then redirects are always followed by default. Enabling + this results in a less secure configuration. Enable this option only when + worried about backward compatibility with kernels that have the redirect_dir + feature and follow redirects even if turned off. + +Module options (can also be changed through /sys/module/overlay/parameters/*): + +- "redirect_dir=BOOL": + See OVERLAY_FS_REDIRECT_DIR kernel config option above. +- "redirect_always_follow=BOOL": + See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above. +- "redirect_max=NUM": + The maximum number of bytes in an absolute redirect (default is 256). + +Mount options: + +- "redirect_dir=on": + Redirects are enabled. +- "redirect_dir=follow": + Redirects are not created, but followed. +- "redirect_dir=off": + Redirects are not created and only followed if "redirect_always_follow" + feature is enabled in the kernel/module config. +- "redirect_dir=nofollow": + Redirects are not created and not followed (equivalent to "redirect_dir=off" + if "redirect_always_follow" feature is not enabled). + Non-directories --------------- diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 3ea622876b67ac1388bf2c58aeb71d9ca598b4b4..e37557b30f620604dfddffa2f00e1d303e147f7d 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -74,15 +74,6 @@ Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c :export: -Legacy CRTC/Modeset Helper Functions Reference -============================================== - -.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c - :doc: overview - -.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c - :export: - Simple KMS Helper Reference =========================== @@ -282,15 +273,6 @@ Flip-work Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_flip_work.c :export: -Plane Helper Reference -====================== - -.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c - :doc: overview - -.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c - :export: - Auxiliary Modeset Helpers ========================= @@ -308,3 +290,21 @@ Framebuffer GEM Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c :export: + +Legacy Plane Helper Reference +============================= + +.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c + :export: + +Legacy CRTC/Modeset Helper Functions Reference +============================================== + +.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c + :export: diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 307284125d7aa964522f85ea6fb34247e67c9de2..2dcf5b42015debbe9c047c61f2a0c35df7d02d65 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -263,14 +263,20 @@ Taken all together there's two consequences for the atomic design: - An atomic update is assembled and validated as an entirely free-standing pile of structures within the :c:type:`drm_atomic_state ` - container. Again drivers can subclass that container for their own state - structure tracking needs. Only when a state is committed is it applied to the - driver and modeset objects. This way rolling back an update boils down to - releasing memory and unreferencing objects like framebuffers. + container. Driver private state structures are also tracked in the same + structure; see the next chapter. Only when a state is committed is it applied + to the driver and modeset objects. This way rolling back an update boils down + to releasing memory and unreferencing objects like framebuffers. Read on in this chapter, and also in :ref:`drm_atomic_helper` for more detailed coverage of specific topics. +Handling Driver Private State +----------------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_atomic.c + :doc: handling driver private state + Atomic Mode Setting Function Reference -------------------------------------- diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index af614746d9c53d67eacdef9b3ea36932294208c7..1e593370f64f061b550e273da46e1cd96645f244 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -194,6 +194,24 @@ drm_mode_config_helper_suspend/resume(). Contact: Maintainer of the driver you plan to convert +Convert drivers to use drm_fb_helper_fbdev_setup/teardown() +----------------------------------------------------------- + +Most drivers can use drm_fb_helper_fbdev_setup() except maybe: + +- amdgpu which has special logic to decide whether to call + drm_helper_disable_unused_functions() + +- armada which isn't atomic and doesn't call + drm_helper_disable_unused_functions() + +- i915 which calls drm_fb_helper_initial_config() in a worker + +Drivers that use drm_framebuffer_remove() to clean up the fbdev framebuffer can +probably use drm_fb_helper_fbdev_teardown(). + +Contact: Maintainer of the driver you plan to convert + Core refactorings ================= @@ -395,11 +413,6 @@ those drivers as simple as possible, so lots of room for refactoring: one of the ideas for having a shared dsi/dbi helper, abstracting away the transport details more. -- tinydrm_lastclose could be drm_fb_helper_lastclose. Only thing we need - for that is to store the drm_fb_helper pointer somewhere in - drm_device->mode_config. And then we could roll that out to all the - drivers. - - tinydrm_gem_cma_prime_import_sg_table should probably go into the cma helpers, as a _vmapped variant (since not every driver needs the vmap). And tinydrm_gem_cma_free_object could the be merged into diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index 262722d8867b2aa9f70f137529dd298e5e36af60..c4a293a03c337f5080d9cf682f64428fc74a1d79 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt @@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax: ::= (1) '=' (2) '!=' (3) - '(' ')' (4) - '!' (5) - '&&' (6) - '||' (7) + '<' (4) + '>' (4) + '<=' (4) + '>=' (4) + '(' ')' (5) + '!' (6) + '&&' (7) + '||' (8) Expressions are listed in decreasing order of precedence. @@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence. otherwise 'n'. (3) If the values of both symbols are equal, it returns 'n', otherwise 'y'. -(4) Returns the value of the expression. Used to override precedence. -(5) Returns the result of (2-/expr/). -(6) Returns the result of min(/expr/, /expr/). -(7) Returns the result of max(/expr/, /expr/). +(4) If value of is respectively lower, greater, lower-or-equal, + or greater-or-equal than value of , it returns 'y', + otherwise 'n'. +(5) Returns the value of the expression. Used to override precedence. +(6) Returns the result of (2-/expr/). +(7) Returns the result of min(/expr/, /expr/). +(8) Returns the result of max(/expr/, /expr/). An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2 respectively for calculations). A menu entry becomes visible when its diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt deleted file mode 100644 index bdf1423d5f99e9b5a65e293049fdf0af7675f3b7..0000000000000000000000000000000000000000 --- a/Documentation/locking/crossrelease.txt +++ /dev/null @@ -1,874 +0,0 @@ -Crossrelease -============ - -Started by Byungchul Park - -Contents: - - (*) Background - - - What causes deadlock - - How lockdep works - - (*) Limitation - - - Limit lockdep - - Pros from the limitation - - Cons from the limitation - - Relax the limitation - - (*) Crossrelease - - - Introduce crossrelease - - Introduce commit - - (*) Implementation - - - Data structures - - How crossrelease works - - (*) Optimizations - - - Avoid duplication - - Lockless for hot paths - - (*) APPENDIX A: What lockdep does to work aggresively - - (*) APPENDIX B: How to avoid adding false dependencies - - -========== -Background -========== - -What causes deadlock --------------------- - -A deadlock occurs when a context is waiting for an event to happen, -which is impossible because another (or the) context who can trigger the -event is also waiting for another (or the) event to happen, which is -also impossible due to the same reason. - -For example: - - A context going to trigger event C is waiting for event A to happen. - A context going to trigger event A is waiting for event B to happen. - A context going to trigger event B is waiting for event C to happen. - -A deadlock occurs when these three wait operations run at the same time, -because event C cannot be triggered if event A does not happen, which in -turn cannot be triggered if event B does not happen, which in turn -cannot be triggered if event C does not happen. After all, no event can -be triggered since any of them never meets its condition to wake up. - -A dependency might exist between two waiters and a deadlock might happen -due to an incorrect releationship between dependencies. Thus, we must -define what a dependency is first. A dependency exists between them if: - - 1. There are two waiters waiting for each event at a given time. - 2. The only way to wake up each waiter is to trigger its event. - 3. Whether one can be woken up depends on whether the other can. - -Each wait in the example creates its dependency like: - - Event C depends on event A. - Event A depends on event B. - Event B depends on event C. - - NOTE: Precisely speaking, a dependency is one between whether a - waiter for an event can be woken up and whether another waiter for - another event can be woken up. However from now on, we will describe - a dependency as if it's one between an event and another event for - simplicity. - -And they form circular dependencies like: - - -> C -> A -> B - - / \ - \ / - ---------------- - - where 'A -> B' means that event A depends on event B. - -Such circular dependencies lead to a deadlock since no waiter can meet -its condition to wake up as described. - -CONCLUSION - -Circular dependencies cause a deadlock. - - -How lockdep works ------------------ - -Lockdep tries to detect a deadlock by checking dependencies created by -lock operations, acquire and release. Waiting for a lock corresponds to -waiting for an event, and releasing a lock corresponds to triggering an -event in the previous section. - -In short, lockdep does: - - 1. Detect a new dependency. - 2. Add the dependency into a global graph. - 3. Check if that makes dependencies circular. - 4. Report a deadlock or its possibility if so. - -For example, consider a graph built by lockdep that looks like: - - A -> B - - \ - -> E - / - C -> D - - - where A, B,..., E are different lock classes. - -Lockdep will add a dependency into the graph on detection of a new -dependency. For example, it will add a dependency 'E -> C' when a new -dependency between lock E and lock C is detected. Then the graph will be: - - A -> B - - \ - -> E - - / \ - -> C -> D - \ - / / - \ / - ------------------ - - where A, B,..., E are different lock classes. - -This graph contains a subgraph which demonstrates circular dependencies: - - -> E - - / \ - -> C -> D - \ - / / - \ / - ------------------ - - where C, D and E are different lock classes. - -This is the condition under which a deadlock might occur. Lockdep -reports it on detection after adding a new dependency. This is the way -how lockdep works. - -CONCLUSION - -Lockdep detects a deadlock or its possibility by checking if circular -dependencies were created after adding each new dependency. - - -========== -Limitation -========== - -Limit lockdep -------------- - -Limiting lockdep to work on only typical locks e.g. spin locks and -mutexes, which are released within the acquire context, the -implementation becomes simple but its capacity for detection becomes -limited. Let's check pros and cons in next section. - - -Pros from the limitation ------------------------- - -Given the limitation, when acquiring a lock, locks in a held_locks -cannot be released if the context cannot acquire it so has to wait to -acquire it, which means all waiters for the locks in the held_locks are -stuck. It's an exact case to create dependencies between each lock in -the held_locks and the lock to acquire. - -For example: - - CONTEXT X - --------- - acquire A - acquire B /* Add a dependency 'A -> B' */ - release B - release A - - where A and B are different lock classes. - -When acquiring lock A, the held_locks of CONTEXT X is empty thus no -dependency is added. But when acquiring lock B, lockdep detects and adds -a new dependency 'A -> B' between lock A in the held_locks and lock B. -They can be simply added whenever acquiring each lock. - -And data required by lockdep exists in a local structure, held_locks -embedded in task_struct. Forcing to access the data within the context, -lockdep can avoid racy problems without explicit locks while handling -the local data. - -Lastly, lockdep only needs to keep locks currently being held, to build -a dependency graph. However, relaxing the limitation, it needs to keep -even locks already released, because a decision whether they created -dependencies might be long-deferred. - -To sum up, we can expect several advantages from the limitation: - - 1. Lockdep can easily identify a dependency when acquiring a lock. - 2. Races are avoidable while accessing local locks in a held_locks. - 3. Lockdep only needs to keep locks currently being held. - -CONCLUSION - -Given the limitation, the implementation becomes simple and efficient. - - -Cons from the limitation ------------------------- - -Given the limitation, lockdep is applicable only to typical locks. For -example, page locks for page access or completions for synchronization -cannot work with lockdep. - -Can we detect deadlocks below, under the limitation? - -Example 1: - - CONTEXT X CONTEXT Y CONTEXT Z - --------- --------- ---------- - mutex_lock A - lock_page B - lock_page B - mutex_lock A /* DEADLOCK */ - unlock_page B held by X - unlock_page B - mutex_unlock A - mutex_unlock A - - where A and B are different lock classes. - -No, we cannot. - -Example 2: - - CONTEXT X CONTEXT Y - --------- --------- - mutex_lock A - mutex_lock A - wait_for_complete B /* DEADLOCK */ - complete B - mutex_unlock A - mutex_unlock A - - where A is a lock class and B is a completion variable. - -No, we cannot. - -CONCLUSION - -Given the limitation, lockdep cannot detect a deadlock or its -possibility caused by page locks or completions. - - -Relax the limitation --------------------- - -Under the limitation, things to create dependencies are limited to -typical locks. However, synchronization primitives like page locks and -completions, which are allowed to be released in any context, also -create dependencies and can cause a deadlock. So lockdep should track -these locks to do a better job. We have to relax the limitation for -these locks to work with lockdep. - -Detecting dependencies is very important for lockdep to work because -adding a dependency means adding an opportunity to check whether it -causes a deadlock. The more lockdep adds dependencies, the more it -thoroughly works. Thus Lockdep has to do its best to detect and add as -many true dependencies into a graph as possible. - -For example, considering only typical locks, lockdep builds a graph like: - - A -> B - - \ - -> E - / - C -> D - - - where A, B,..., E are different lock classes. - -On the other hand, under the relaxation, additional dependencies might -be created and added. Assuming additional 'FX -> C' and 'E -> GX' are -added thanks to the relaxation, the graph will be: - - A -> B - - \ - -> E -> GX - / - FX -> C -> D - - - where A, B,..., E, FX and GX are different lock classes, and a suffix - 'X' is added on non-typical locks. - -The latter graph gives us more chances to check circular dependencies -than the former. However, it might suffer performance degradation since -relaxing the limitation, with which design and implementation of lockdep -can be efficient, might introduce inefficiency inevitably. So lockdep -should provide two options, strong detection and efficient detection. - -Choosing efficient detection: - - Lockdep works with only locks restricted to be released within the - acquire context. However, lockdep works efficiently. - -Choosing strong detection: - - Lockdep works with all synchronization primitives. However, lockdep - suffers performance degradation. - -CONCLUSION - -Relaxing the limitation, lockdep can add additional dependencies giving -additional opportunities to check circular dependencies. - - -============ -Crossrelease -============ - -Introduce crossrelease ----------------------- - -In order to allow lockdep to handle additional dependencies by what -might be released in any context, namely 'crosslock', we have to be able -to identify those created by crosslocks. The proposed 'crossrelease' -feature provoides a way to do that. - -Crossrelease feature has to do: - - 1. Identify dependencies created by crosslocks. - 2. Add the dependencies into a dependency graph. - -That's all. Once a meaningful dependency is added into graph, then -lockdep would work with the graph as it did. The most important thing -crossrelease feature has to do is to correctly identify and add true -dependencies into the global graph. - -A dependency e.g. 'A -> B' can be identified only in the A's release -context because a decision required to identify the dependency can be -made only in the release context. That is to decide whether A can be -released so that a waiter for A can be woken up. It cannot be made in -other than the A's release context. - -It's no matter for typical locks because each acquire context is same as -its release context, thus lockdep can decide whether a lock can be -released in the acquire context. However for crosslocks, lockdep cannot -make the decision in the acquire context but has to wait until the -release context is identified. - -Therefore, deadlocks by crosslocks cannot be detected just when it -happens, because those cannot be identified until the crosslocks are -released. However, deadlock possibilities can be detected and it's very -worth. See 'APPENDIX A' section to check why. - -CONCLUSION - -Using crossrelease feature, lockdep can work with what might be released -in any context, namely crosslock. - - -Introduce commit ----------------- - -Since crossrelease defers the work adding true dependencies of -crosslocks until they are actually released, crossrelease has to queue -all acquisitions which might create dependencies with the crosslocks. -Then it identifies dependencies using the queued data in batches at a -proper time. We call it 'commit'. - -There are four types of dependencies: - -1. TT type: 'typical lock A -> typical lock B' - - Just when acquiring B, lockdep can see it's in the A's release - context. So the dependency between A and B can be identified - immediately. Commit is unnecessary. - -2. TC type: 'typical lock A -> crosslock BX' - - Just when acquiring BX, lockdep can see it's in the A's release - context. So the dependency between A and BX can be identified - immediately. Commit is unnecessary, too. - -3. CT type: 'crosslock AX -> typical lock B' - - When acquiring B, lockdep cannot identify the dependency because - there's no way to know if it's in the AX's release context. It has - to wait until the decision can be made. Commit is necessary. - -4. CC type: 'crosslock AX -> crosslock BX' - - When acquiring BX, lockdep cannot identify the dependency because - there's no way to know if it's in the AX's release context. It has - to wait until the decision can be made. Commit is necessary. - But, handling CC type is not implemented yet. It's a future work. - -Lockdep can work without commit for typical locks, but commit step is -necessary once crosslocks are involved. Introducing commit, lockdep -performs three steps. What lockdep does in each step is: - -1. Acquisition: For typical locks, lockdep does what it originally did - and queues the lock so that CT type dependencies can be checked using - it at the commit step. For crosslocks, it saves data which will be - used at the commit step and increases a reference count for it. - -2. Commit: No action is reauired for typical locks. For crosslocks, - lockdep adds CT type dependencies using the data saved at the - acquisition step. - -3. Release: No changes are required for typical locks. When a crosslock - is released, it decreases a reference count for it. - -CONCLUSION - -Crossrelease introduces commit step to handle dependencies of crosslocks -in batches at a proper time. - - -============== -Implementation -============== - -Data structures ---------------- - -Crossrelease introduces two main data structures. - -1. hist_lock - - This is an array embedded in task_struct, for keeping lock history so - that dependencies can be added using them at the commit step. Since - it's local data, it can be accessed locklessly in the owner context. - The array is filled at the acquisition step and consumed at the - commit step. And it's managed in circular manner. - -2. cross_lock - - One per lockdep_map exists. This is for keeping data of crosslocks - and used at the commit step. - - -How crossrelease works ----------------------- - -It's the key of how crossrelease works, to defer necessary works to an -appropriate point in time and perform in at once at the commit step. -Let's take a look with examples step by step, starting from how lockdep -works without crossrelease for typical locks. - - acquire A /* Push A onto held_locks */ - acquire B /* Push B onto held_locks and add 'A -> B' */ - acquire C /* Push C onto held_locks and add 'B -> C' */ - release C /* Pop C from held_locks */ - release B /* Pop B from held_locks */ - release A /* Pop A from held_locks */ - - where A, B and C are different lock classes. - - NOTE: This document assumes that readers already understand how - lockdep works without crossrelease thus omits details. But there's - one thing to note. Lockdep pretends to pop a lock from held_locks - when releasing it. But it's subtly different from the original pop - operation because lockdep allows other than the top to be poped. - -In this case, lockdep adds 'the top of held_locks -> the lock to acquire' -dependency every time acquiring a lock. - -After adding 'A -> B', a dependency graph will be: - - A -> B - - where A and B are different lock classes. - -And after adding 'B -> C', the graph will be: - - A -> B -> C - - where A, B and C are different lock classes. - -Let's performs commit step even for typical locks to add dependencies. -Of course, commit step is not necessary for them, however, it would work -well because this is a more general way. - - acquire A - /* - * Queue A into hist_locks - * - * In hist_locks: A - * In graph: Empty - */ - - acquire B - /* - * Queue B into hist_locks - * - * In hist_locks: A, B - * In graph: Empty - */ - - acquire C - /* - * Queue C into hist_locks - * - * In hist_locks: A, B, C - * In graph: Empty - */ - - commit C - /* - * Add 'C -> ?' - * Answer the following to decide '?' - * What has been queued since acquire C: Nothing - * - * In hist_locks: A, B, C - * In graph: Empty - */ - - release C - - commit B - /* - * Add 'B -> ?' - * Answer the following to decide '?' - * What has been queued since acquire B: C - * - * In hist_locks: A, B, C - * In graph: 'B -> C' - */ - - release B - - commit A - /* - * Add 'A -> ?' - * Answer the following to decide '?' - * What has been queued since acquire A: B, C - * - * In hist_locks: A, B, C - * In graph: 'B -> C', 'A -> B', 'A -> C' - */ - - release A - - where A, B and C are different lock classes. - -In this case, dependencies are added at the commit step as described. - -After commits for A, B and C, the graph will be: - - A -> B -> C - - where A, B and C are different lock classes. - - NOTE: A dependency 'A -> C' is optimized out. - -We can see the former graph built without commit step is same as the -latter graph built using commit steps. Of course the former way leads to -earlier finish for building the graph, which means we can detect a -deadlock or its possibility sooner. So the former way would be prefered -when possible. But we cannot avoid using the latter way for crosslocks. - -Let's look at how commit steps work for crosslocks. In this case, the -commit step is performed only on crosslock AX as real. And it assumes -that the AX release context is different from the AX acquire context. - - BX RELEASE CONTEXT BX ACQUIRE CONTEXT - ------------------ ------------------ - acquire A - /* - * Push A onto held_locks - * Queue A into hist_locks - * - * In held_locks: A - * In hist_locks: A - * In graph: Empty - */ - - acquire BX - /* - * Add 'the top of held_locks -> BX' - * - * In held_locks: A - * In hist_locks: A - * In graph: 'A -> BX' - */ - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - It must be guaranteed that the following operations are seen after - acquiring BX globally. It can be done by things like barrier. - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - acquire C - /* - * Push C onto held_locks - * Queue C into hist_locks - * - * In held_locks: C - * In hist_locks: C - * In graph: 'A -> BX' - */ - - release C - /* - * Pop C from held_locks - * - * In held_locks: Empty - * In hist_locks: C - * In graph: 'A -> BX' - */ - acquire D - /* - * Push D onto held_locks - * Queue D into hist_locks - * Add 'the top of held_locks -> D' - * - * In held_locks: A, D - * In hist_locks: A, D - * In graph: 'A -> BX', 'A -> D' - */ - acquire E - /* - * Push E onto held_locks - * Queue E into hist_locks - * - * In held_locks: E - * In hist_locks: C, E - * In graph: 'A -> BX', 'A -> D' - */ - - release E - /* - * Pop E from held_locks - * - * In held_locks: Empty - * In hist_locks: D, E - * In graph: 'A -> BX', 'A -> D' - */ - release D - /* - * Pop D from held_locks - * - * In held_locks: A - * In hist_locks: A, D - * In graph: 'A -> BX', 'A -> D' - */ - commit BX - /* - * Add 'BX -> ?' - * What has been queued since acquire BX: C, E - * - * In held_locks: Empty - * In hist_locks: D, E - * In graph: 'A -> BX', 'A -> D', - * 'BX -> C', 'BX -> E' - */ - - release BX - /* - * In held_locks: Empty - * In hist_locks: D, E - * In graph: 'A -> BX', 'A -> D', - * 'BX -> C', 'BX -> E' - */ - release A - /* - * Pop A from held_locks - * - * In held_locks: Empty - * In hist_locks: A, D - * In graph: 'A -> BX', 'A -> D', - * 'BX -> C', 'BX -> E' - */ - - where A, BX, C,..., E are different lock classes, and a suffix 'X' is - added on crosslocks. - -Crossrelease considers all acquisitions after acqiuring BX are -candidates which might create dependencies with BX. True dependencies -will be determined when identifying the release context of BX. Meanwhile, -all typical locks are queued so that they can be used at the commit step. -And then two dependencies 'BX -> C' and 'BX -> E' are added at the -commit step when identifying the release context. - -The final graph will be, with crossrelease: - - -> C - / - -> BX - - / \ - A - -> E - \ - -> D - - where A, BX, C,..., E are different lock classes, and a suffix 'X' is - added on crosslocks. - -However, the final graph will be, without crossrelease: - - A -> D - - where A and D are different lock classes. - -The former graph has three more dependencies, 'A -> BX', 'BX -> C' and -'BX -> E' giving additional opportunities to check if they cause -deadlocks. This way lockdep can detect a deadlock or its possibility -caused by crosslocks. - -CONCLUSION - -We checked how crossrelease works with several examples. - - -============= -Optimizations -============= - -Avoid duplication ------------------ - -Crossrelease feature uses a cache like what lockdep already uses for -dependency chains, but this time it's for caching CT type dependencies. -Once that dependency is cached, the same will never be added again. - - -Lockless for hot paths ----------------------- - -To keep all locks for later use at the commit step, crossrelease adopts -a local array embedded in task_struct, which makes access to the data -lockless by forcing it to happen only within the owner context. It's -like how lockdep handles held_locks. Lockless implmentation is important -since typical locks are very frequently acquired and released. - - -================================================= -APPENDIX A: What lockdep does to work aggresively -================================================= - -A deadlock actually occurs when all wait operations creating circular -dependencies run at the same time. Even though they don't, a potential -deadlock exists if the problematic dependencies exist. Thus it's -meaningful to detect not only an actual deadlock but also its potential -possibility. The latter is rather valuable. When a deadlock occurs -actually, we can identify what happens in the system by some means or -other even without lockdep. However, there's no way to detect possiblity -without lockdep unless the whole code is parsed in head. It's terrible. -Lockdep does the both, and crossrelease only focuses on the latter. - -Whether or not a deadlock actually occurs depends on several factors. -For example, what order contexts are switched in is a factor. Assuming -circular dependencies exist, a deadlock would occur when contexts are -switched so that all wait operations creating the dependencies run -simultaneously. Thus to detect a deadlock possibility even in the case -that it has not occured yet, lockdep should consider all possible -combinations of dependencies, trying to: - -1. Use a global dependency graph. - - Lockdep combines all dependencies into one global graph and uses them, - regardless of which context generates them or what order contexts are - switched in. Aggregated dependencies are only considered so they are - prone to be circular if a problem exists. - -2. Check dependencies between classes instead of instances. - - What actually causes a deadlock are instances of lock. However, - lockdep checks dependencies between classes instead of instances. - This way lockdep can detect a deadlock which has not happened but - might happen in future by others but the same class. - -3. Assume all acquisitions lead to waiting. - - Although locks might be acquired without waiting which is essential - to create dependencies, lockdep assumes all acquisitions lead to - waiting since it might be true some time or another. - -CONCLUSION - -Lockdep detects not only an actual deadlock but also its possibility, -and the latter is more valuable. - - -================================================== -APPENDIX B: How to avoid adding false dependencies -================================================== - -Remind what a dependency is. A dependency exists if: - - 1. There are two waiters waiting for each event at a given time. - 2. The only way to wake up each waiter is to trigger its event. - 3. Whether one can be woken up depends on whether the other can. - -For example: - - acquire A - acquire B /* A dependency 'A -> B' exists */ - release B - release A - - where A and B are different lock classes. - -A depedency 'A -> B' exists since: - - 1. A waiter for A and a waiter for B might exist when acquiring B. - 2. Only way to wake up each is to release what it waits for. - 3. Whether the waiter for A can be woken up depends on whether the - other can. IOW, TASK X cannot release A if it fails to acquire B. - -For another example: - - TASK X TASK Y - ------ ------ - acquire AX - acquire B /* A dependency 'AX -> B' exists */ - release B - release AX held by Y - - where AX and B are different lock classes, and a suffix 'X' is added - on crosslocks. - -Even in this case involving crosslocks, the same rule can be applied. A -depedency 'AX -> B' exists since: - - 1. A waiter for AX and a waiter for B might exist when acquiring B. - 2. Only way to wake up each is to release what it waits for. - 3. Whether the waiter for AX can be woken up depends on whether the - other can. IOW, TASK X cannot release AX if it fails to acquire B. - -Let's take a look at more complicated example: - - TASK X TASK Y - ------ ------ - acquire B - release B - fork Y - acquire AX - acquire C /* A dependency 'AX -> C' exists */ - release C - release AX held by Y - - where AX, B and C are different lock classes, and a suffix 'X' is - added on crosslocks. - -Does a dependency 'AX -> B' exist? Nope. - -Two waiters are essential to create a dependency. However, waiters for -AX and B to create 'AX -> B' cannot exist at the same time in this -example. Thus the dependency 'AX -> B' cannot be created. - -It would be ideal if the full set of true ones can be considered. But -we can ensure nothing but what actually happened. Relying on what -actually happens at runtime, we can anyway add only true ones, though -they might be a subset of true ones. It's similar to how lockdep works -for typical locks. There might be more true dependencies than what -lockdep has detected in runtime. Lockdep has no choice but to rely on -what actually happens. Crossrelease also relies on it. - -CONCLUSION - -Relying on what actually happens, lockdep can avoid adding false -dependencies. diff --git a/Documentation/media/dvb-drivers/frontends.rst b/Documentation/media/dvb-drivers/frontends.rst new file mode 100644 index 0000000000000000000000000000000000000000..1f5f5798919627411ec88a2a96097d5fa6444c8a --- /dev/null +++ b/Documentation/media/dvb-drivers/frontends.rst @@ -0,0 +1,30 @@ +**************** +Frontend drivers +**************** + +Frontend attach headers +*********************** + +.. Keep it on alphabetic order + +.. kernel-doc:: drivers/media/dvb-frontends/a8293.h +.. kernel-doc:: drivers/media/dvb-frontends/af9013.h +.. kernel-doc:: drivers/media/dvb-frontends/ascot2e.h +.. kernel-doc:: drivers/media/dvb-frontends/cxd2820r.h +.. kernel-doc:: drivers/media/dvb-frontends/drxk.h +.. kernel-doc:: drivers/media/dvb-frontends/dvb-pll.h +.. kernel-doc:: drivers/media/dvb-frontends/helene.h +.. kernel-doc:: drivers/media/dvb-frontends/horus3a.h +.. kernel-doc:: drivers/media/dvb-frontends/ix2505v.h +.. kernel-doc:: drivers/media/dvb-frontends/m88ds3103.h +.. kernel-doc:: drivers/media/dvb-frontends/mb86a20s.h +.. kernel-doc:: drivers/media/dvb-frontends/mn88472.h +.. kernel-doc:: drivers/media/dvb-frontends/rtl2830.h +.. kernel-doc:: drivers/media/dvb-frontends/rtl2832.h +.. kernel-doc:: drivers/media/dvb-frontends/rtl2832_sdr.h +.. kernel-doc:: drivers/media/dvb-frontends/stb6000.h +.. kernel-doc:: drivers/media/dvb-frontends/tda10071.h +.. kernel-doc:: drivers/media/dvb-frontends/tda826x.h +.. kernel-doc:: drivers/media/dvb-frontends/zd1301_demod.h +.. kernel-doc:: drivers/media/dvb-frontends/zl10036.h + diff --git a/Documentation/media/dvb-drivers/index.rst b/Documentation/media/dvb-drivers/index.rst index 376141143ae91d46a953b49bc9338fd7c9d9b203..314e127d82e310e42f97a96a21dbf566db6b43ab 100644 --- a/Documentation/media/dvb-drivers/index.rst +++ b/Documentation/media/dvb-drivers/index.rst @@ -41,4 +41,5 @@ For more details see the file COPYING in the source distribution of Linux. technisat ttusb-dec udev + frontends contributors diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 66e62086624501fe76c170f77efc1cbc0844032f..7d4b15977d61201eb8b265293d3d95a6b042c7e6 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -9,6 +9,7 @@ Contents: batman-adv kapi z8530book + msg_zerocopy .. only:: subproject @@ -16,4 +17,3 @@ Contents: ======= * :ref:`genindex` - diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index 77f6d7e25cfda65ac36e0c47bb2f8bdacc2fee2a..291a012649678035b5d0fdc306904093ceedf610 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option: if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one))) error(1, errno, "setsockopt zerocopy"); +Setting the socket option only works when the socket is in its initial +(TCP_CLOSED) state. Trying to set the option for a socket returned by accept(), +for example, will lead to an EBUSY error. In this case, the option should be set +to the listening socket and it will be inherited by the accepted sockets. Transmission ------------ diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 6338400eed73d7e2f8b90807186e15d54cd16cde..2c31d9ee6776ea7fef125d8fd00e7f2ef8614e36 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -319,12 +319,12 @@ struct Scsi_Host: instance. If the reference count reaches 0 then the given instance is freed -The Scsi_device structure has had reference counting infrastructure added. -This effectively spreads the ownership of struct Scsi_device instances +The scsi_device structure has had reference counting infrastructure added. +This effectively spreads the ownership of struct scsi_device instances across the various SCSI layers which use them. Previously such instances were exclusively owned by the mid level. See the access functions declared towards the end of include/scsi/scsi_device.h . If an LLD wants to keep -a copy of a pointer to a Scsi_device instance it should use scsi_device_get() +a copy of a pointer to a scsi_device instance it should use scsi_device_get() to bump its reference count. When it is finished with the pointer it can use scsi_device_put() to decrement its reference count (and potentially delete it). diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt index 441a4b9b666fbb2b2aace2cbc2f5ab542635d553..5908a21fddb6035cd25bb759bfd01ee405371163 100644 --- a/Documentation/usb/gadget-testing.txt +++ b/Documentation/usb/gadget-testing.txt @@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value in each line. The rules stated above are best illustrated with an example: # mkdir functions/uvc.usb0/control/header/h -# cd functions/uvc.usb0/control/header/h +# cd functions/uvc.usb0/control/ # ln -s header/h class/fs # ln -s header/h class/ss # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f670e4b9e7f33fdbb186d9b1c9186c6a2fbf7324..57d3ee9e4bde2a799715ca75871fd61b27858b0a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2901,14 +2901,19 @@ userspace buffer and its length: struct kvm_s390_irq_state { __u64 buf; - __u32 flags; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 reserved[4]; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; Userspace passes in the above struct and for each pending interrupt a struct kvm_s390_irq is copied to the provided buffer. +The structure contains a flags and a reserved field for future extensions. As +the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and +reserved, these fields can not be used in the future without breaking +compatibility. + If -ENOBUFS is returned the buffer provided was too small and userspace may retry with a bigger buffer. @@ -2932,10 +2937,14 @@ containing a struct kvm_s390_irq_state: struct kvm_s390_irq_state { __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ __u32 len; - __u32 pad; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; +The restrictions for flags and reserved apply as well. +(see KVM_S390_GET_IRQ_STATE) + The userspace memory referenced by buf contains a struct kvm_s390_irq for each interrupt to be injected into the guest. If one of the interrupts could not be injected for some reason the diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt index 89fff7d611ccb533a5c3d375bc94fecf3c2e0687..0b3a1148f9f0414558ed0537b4219225162ccc3a 100644 --- a/Documentation/vm/zswap.txt +++ b/Documentation/vm/zswap.txt @@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its original compressor. Once all pages are removed from an old zpool, the zpool and its compressor are freed. +Some of the pages in zswap are same-value filled pages (i.e. contents of the +page have same value or repetitive pattern). These pages include zero-filled +pages and they are handled differently. During store operation, a page is +checked if it is a same-value filled page before compressing it. If true, the +compressed length of the page is set to zero and the pattern or same-filled +value is stored. + +Same-value filled pages identification feature is enabled by default and can be +disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0, +e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at +runtime using the sysfs "same_filled_pages_enabled" attribute, e.g. + +echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled + +When zswap same-filled page identification is disabled at runtime, it will stop +checking for the same-value filled pages during store operation. However, the +existing pages which are marked as same-value filled pages remain stored +unchanged in zswap until they are either loaded or invalidated. + A debugfs interface is provided for various statistic about pool size, number -of pages stored, and various counters for the reasons pages are rejected. +of pages stored, same-value filled pages and various counters for the reasons +pages are rejected. diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt new file mode 100644 index 0000000000000000000000000000000000000000..d11eff61fc9addf6cd0ad54db5b30bac73783503 --- /dev/null +++ b/Documentation/x86/pti.txt @@ -0,0 +1,186 @@ +Overview +======== + +Page Table Isolation (pti, previously known as KAISER[1]) is a +countermeasure against attacks on the shared user/kernel address +space such as the "Meltdown" approach[2]. + +To mitigate this class of attacks, we create an independent set of +page tables for use only when running userspace applications. When +the kernel is entered via syscalls, interrupts or exceptions, the +page tables are switched to the full "kernel" copy. When the system +switches back to user mode, the user copy is used again. + +The userspace page tables contain only a minimal amount of kernel +data: only what is needed to enter/exit the kernel such as the +entry/exit functions themselves and the interrupt descriptor table +(IDT). There are a few strictly unnecessary things that get mapped +such as the first C function when entering an interrupt (see +comments in pti.c). + +This approach helps to ensure that side-channel attacks leveraging +the paging structures do not function when PTI is enabled. It can be +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time. +Once enabled at compile-time, it can be disabled at boot with the +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt). + +Page Table Management +===================== + +When PTI is enabled, the kernel manages two sets of page tables. +The first set is very similar to the single set which is present in +kernels without PTI. This includes a complete mapping of userspace +that the kernel can use for things like copy_to_user(). + +Although _complete_, the user portion of the kernel page tables is +crippled by setting the NX bit in the top level. This ensures +that any missed kernel->user CR3 switch will immediately crash +userspace upon executing its first instruction. + +The userspace page tables map only the kernel data needed to enter +and exit the kernel. This data is entirely contained in the 'struct +cpu_entry_area' structure which is placed in the fixmap which gives +each CPU's copy of the area a compile-time-fixed virtual address. + +For new userspace mappings, the kernel makes the entries in its +page tables like normal. The only difference is when the kernel +makes entries in the top (PGD) level. In addition to setting the +entry in the main kernel PGD, a copy of the entry is made in the +userspace page tables' PGD. + +This sharing at the PGD level also inherently shares all the lower +layers of the page tables. This leaves a single, shared set of +userspace page tables to manage. One PTE to lock, one set of +accessed bits, dirty bits, etc... + +Overhead +======== + +Protection against side-channel attacks is important. But, +this protection comes at a cost: + +1. Increased Memory Use + a. Each process now needs an order-1 PGD instead of order-0. + (Consumes an additional 4k per process). + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB + aligned so that it can be mapped by setting a single PMD + entry. This consumes nearly 2MB of RAM once the kernel + is decompressed, but no space in the kernel image itself. + +2. Runtime Cost + a. CR3 manipulation to switch between the page table copies + must be done at interrupt, syscall, and exception entry + and exit (it can be skipped when the kernel is interrupted, + though.) Moves to CR3 are on the order of a hundred + cycles, and are required at every entry and exit. + b. A "trampoline" must be used for SYSCALL entry. This + trampoline depends on a smaller set of resources than the + non-PTI SYSCALL entry code, so requires mapping fewer + things into the userspace page tables. The downside is + that stacks must be switched at entry time. + d. Global pages are disabled for all kernel structures not + mapped into both kernel and userspace page tables. This + feature of the MMU allows different processes to share TLB + entries mapping the kernel. Losing the feature means more + TLB misses after a context switch. The actual loss of + performance is very small, however, never exceeding 1%. + d. Process Context IDentifiers (PCID) is a CPU feature that + allows us to skip flushing the entire TLB when switching page + tables by setting a special bit in CR3 when the page tables + are changed. This makes switching the page tables (at context + switch, or kernel entry/exit) cheaper. But, on systems with + PCID support, the context switch code must flush both the user + and kernel entries out of the TLB. The user PCID TLB flush is + deferred until the exit to userspace, minimizing the cost. + See intel.com/sdm for the gory PCID/INVPCID details. + e. The userspace page tables must be populated for each new + process. Even without PTI, the shared kernel mappings + are created by copying top-level (PGD) entries into each + new process. But, with PTI, there are now *two* kernel + mappings: one in the kernel page tables that maps everything + and one for the entry/exit structures. At fork(), we need to + copy both. + f. In addition to the fork()-time copying, there must also + be an update to the userspace PGD any time a set_pgd() is done + on a PGD used to map userspace. This ensures that the kernel + and userspace copies always map the same userspace + memory. + g. On systems without PCID support, each CR3 write flushes + the entire TLB. That means that each syscall, interrupt + or exception flushes the TLB. + h. INVPCID is a TLB-flushing instruction which allows flushing + of TLB entries for non-current PCIDs. Some systems support + PCIDs, but do not support INVPCID. On these systems, addresses + can only be flushed from the TLB for the current PCID. When + flushing a kernel address, we need to flush all PCIDs, so a + single kernel address flush will require a TLB-flushing CR3 + write upon the next use of every PCID. + +Possible Future Work +==================== +1. We can be more careful about not actually writing to CR3 + unless its value is actually changed. +2. Allow PTI to be enabled/disabled at runtime in addition to the + boot-time switching. + +Testing +======== + +To test stability of PTI, the following test procedure is recommended, +ideally doing all of these in parallel: + +1. Set CONFIG_DEBUG_ENTRY=y +2. Run several copies of all of the tools/testing/selftests/x86/ tests + (excluding MPX and protection_keys) in a loop on multiple CPUs for + several minutes. These tests frequently uncover corner cases in the + kernel entry code. In general, old kernels might cause these tests + themselves to crash, but they should never crash the kernel. +3. Run the 'perf' tool in a mode (top or record) that generates many + frequent performance monitoring non-maskable interrupts (see "NMI" + in /proc/interrupts). This exercises the NMI entry/exit code which + is known to trigger bugs in code paths that did not expect to be + interrupted, including nested NMIs. Using "-c" boosts the rate of + NMIs, and using two -c with separate counters encourages nested NMIs + and less deterministic behavior. + + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done + +4. Launch a KVM virtual machine. +5. Run 32-bit binaries on systems supporting the SYSCALL instruction. + This has been a lightly-tested code path and needs extra scrutiny. + +Debugging +========= + +Bugs in PTI cause a few different signatures of crashes +that are worth noting here. + + * Failures of the selftests/x86 code. Usually a bug in one of the + more obscure corners of entry_64.S + * Crashes in early boot, especially around CPU bringup. Bugs + in the trampoline code or mappings cause these. + * Crashes at the first interrupt. Caused by bugs in entry_64.S, + like screwing up a page table switch. Also caused by + incorrectly mapping the IRQ handler entry code. + * Crashes at the first NMI. The NMI code is separate from main + interrupt handlers and can have bugs that do not affect + normal interrupts. Also caused by incorrectly mapping NMI + code. NMIs that interrupt the entry code must be very + careful and can be the cause of crashes that show up when + running perf. + * Kernel crashes at the first exit to userspace. entry_64.S + bugs, or failing to map some of the exit code. + * Crashes at first interrupt that interrupts userspace. The paths + in entry_64.S that return to userspace are sometimes separate + from the ones that return to the kernel. + * Double faults: overflowing the kernel stack because of page + faults upon page faults. Caused by touching non-pti-mapped + data in the entry code, or forgetting to switch to kernel + CR3 before calling into C functions which are not pti-mapped. + * Userspace segfaults early in boot, sometimes manifesting + as mount(8) failing to mount the rootfs. These have + tended to be TLB invalidation issues. Usually invalidating + the wrong PCID, or otherwise missing an invalidation. + +1. https://gruss.cc/files/kaiser.pdf +2. https://meltdownattack.com/meltdown.pdf diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 3448e675b4623ce81b5e0bc1116c52a12c411801..ea91cb61a60297ac658a4160cee200da75e6f00d 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -1,6 +1,4 @@ - - Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm @@ -14,13 +12,17 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB) ... unused hole ... + vaddr_end for KASLR +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping +fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable) -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space (variable) +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Virtual memory map with 5 level page tables: @@ -29,26 +31,31 @@ Virtual memory map with 5 level page tables: hole caused by [56:63] sign extension ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory -ff90000000000000 - ff91ffffffffffff (=49 bits) hole -ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space +ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI +ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB) ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ... unused hole ... ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) +... unused hole ... + vaddr_end for KASLR +fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping ... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space ... unused hole ... ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0 -ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space -ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls +ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space +[fixmap start] - ffffffffff5fffff kernel-internal fixmap range +ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole Architecture defines a 64-bit virtual address. Implementations can support less. Currently supported are 48- and 57-bit virtual addresses. Bits 63 -through to the most-significant implemented bit are set to either all ones -or all zero. This causes hole between user space and kernel addresses. +through to the most-significant implemented bit are sign extended. +This causes hole between user space and kernel addresses if you interpret them +as unsigned. The direct mapping covers all memory in the system up to the highest memory address (this means in some cases it can also include PCI memory @@ -58,19 +65,15 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of the processes using the page fault handler, with init_top_pgt as reference. -Current X86-64 implementations support up to 46 bits of address space (64 TB), -which is our current limit. This expands into MBZ space in the page tables. - We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual memory window (this size is arbitrary, it can be raised later if needed). The mappings are not part of any other kernel PGD and are only available during EFI runtime calls. -The module mapping space size changes based on the CONFIG requirements for the -following fixmap section. - Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all physical memory, vmalloc/ioremap space and virtual memory map are randomized. Their order is preserved but their base will be offset early at boot time. --Andi Kleen, Jul 2004 +Be very careful vs. KASLR when changing anything here. The KASLR address +range must not overlap with anything except the KASAN shadow area, which is +correct as KASAN disables KASLR. diff --git a/MAINTAINERS b/MAINTAINERS index 8110df7acfeab0632270ddeb033c114b10526e91..5bc088f27c83b761e9c3b6766e7c86e71c11c7e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -554,13 +554,13 @@ S: Orphan F: Documentation/filesystems/affs.txt F: fs/affs/ -AFS FILESYSTEM & AF_RXRPC SOCKET DOMAIN +AFS FILESYSTEM M: David Howells L: linux-afs@lists.infradead.org S: Supported F: fs/afs/ -F: include/net/af_rxrpc.h -F: net/rxrpc/af_rxrpc.c +F: include/trace/events/afs.h +F: Documentation/filesystems/afs.txt W: https://www.infradead.org/~dhowells/kafs/ AGPGART DRIVER @@ -859,7 +859,8 @@ F: kernel/configs/android* ANDROID DRIVERS M: Greg Kroah-Hartman M: Arve Hjønnevåg -M: Riley Andrews +M: Todd Kjos +M: Martijn Coenen T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git L: devel@driverdev.osuosl.org S: Supported @@ -2046,7 +2047,7 @@ F: arch/arm/boot/dts/uniphier* F: arch/arm/include/asm/hardware/cache-uniphier.h F: arch/arm/mach-uniphier/ F: arch/arm/mm/cache-uniphier.c -F: arch/arm64/boot/dts/socionext/ +F: arch/arm64/boot/dts/socionext/uniphier* F: drivers/bus/uniphier-system-bus.c F: drivers/clk/uniphier/ F: drivers/gpio/gpio-uniphier.c @@ -2620,24 +2621,22 @@ F: fs/bfs/ F: include/uapi/linux/bfs_fs.h BLACKFIN ARCHITECTURE -M: Steven Miao L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) T: git git://git.code.sf.net/p/adi-linux/code W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: arch/blackfin/ BLACKFIN EMAC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/net/ethernet/adi/ BLACKFIN MEDIA DRIVER -M: Scott Jiang L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org/ -S: Supported +S: Orphan F: drivers/media/platform/blackfin/ F: drivers/media/i2c/adv7183* F: drivers/media/i2c/vs6624* @@ -2645,25 +2644,25 @@ F: drivers/media/i2c/vs6624* BLACKFIN RTC DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/rtc/rtc-bfin.c BLACKFIN SDH DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/mmc/host/bfin_sdh.c BLACKFIN SERIAL DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/tty/serial/bfin_uart.c BLACKFIN WATCHDOG DRIVER L: adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://blackfin.uclinux.org -S: Supported +S: Orphan F: drivers/watchdog/bfin_wdt.c BLINKM RGB LED DRIVER @@ -4553,6 +4552,12 @@ S: Maintained F: drivers/gpu/drm/tinydrm/st7586.c F: Documentation/devicetree/bindings/display/st7586.txt +DRM DRIVER FOR SITRONIX ST7735R PANELS +M: David Lechner +S: Maintained +F: drivers/gpu/drm/tinydrm/st7735r.c +F: Documentation/devicetree/bindings/display/st7735r.txt + DRM DRIVER FOR TDFX VIDEO CARDS S: Orphan / Obsolete F: drivers/gpu/drm/tdfx/ @@ -4592,7 +4597,6 @@ F: include/linux/vga* DRM DRIVERS AND MISC GPU PATCHES M: Daniel Vetter -M: Jani Nikula M: Gustavo Padovan M: Sean Paul W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html @@ -4811,6 +4815,15 @@ S: Maintained F: drivers/gpu/drm/tinydrm/ F: include/drm/tinydrm/ +DRM TTM SUBSYSTEM +M: Christian Koenig +M: Roger He +T: git git://people.freedesktop.org/~agd5f/linux +S: Maintained +L: dri-devel@lists.freedesktop.org +F: include/drm/ttm/ +F: drivers/gpu/drm/ttm/ + DSBR100 USB FM RADIO DRIVER M: Alexey Klimov L: linux-media@vger.kernel.org @@ -5158,15 +5171,15 @@ F: sound/usb/misc/ua101.c EFI TEST DRIVER L: linux-efi@vger.kernel.org M: Ivan Hu -M: Matt Fleming +M: Ard Biesheuvel S: Maintained F: drivers/firmware/efi/test/ EFI VARIABLE FILESYSTEM M: Matthew Garrett M: Jeremy Kerr -M: Matt Fleming -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git +M: Ard Biesheuvel +T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git L: linux-efi@vger.kernel.org S: Maintained F: fs/efivarfs/ @@ -5327,7 +5340,6 @@ S: Supported F: security/integrity/evm/ EXTENSIBLE FIRMWARE INTERFACE (EFI) -M: Matt Fleming M: Ard Biesheuvel L: linux-efi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git @@ -5438,7 +5450,7 @@ F: drivers/media/tuners/fc2580* FCOE SUBSYSTEM (libfc, libfcoe, fcoe) M: Johannes Thumshirn -L: fcoe-devel@open-fcoe.org +L: linux-scsi@vger.kernel.org W: www.Open-FCoE.org S: Supported F: drivers/scsi/libfc/ @@ -7774,6 +7786,7 @@ F: security/keys/ KGDB / KDB /debug_core M: Jason Wessel +M: Daniel Thompson W: http://kgdb.wiki.kernel.org/ L: kgdb-bugreport@lists.sourceforge.net T: git git://git.kernel.org/pub/scm/linux/kernel/git/jwessel/kgdb.git @@ -9647,8 +9660,8 @@ F: include/uapi/linux/sunrpc/ NILFS2 FILESYSTEM M: Ryusuke Konishi L: linux-nilfs@vger.kernel.org -W: http://nilfs.sourceforge.net/ -W: http://nilfs.osdn.jp/ +W: https://nilfs.sourceforge.io/ +W: https://nilfs.osdn.jp/ T: git git://github.com/konis/nilfs2.git S: Supported F: Documentation/filesystems/nilfs2.txt @@ -10143,7 +10156,7 @@ F: drivers/irqchip/irq-ompic.c F: drivers/irqchip/irq-or1k-* OPENVSWITCH -M: Pravin Shelar +M: Pravin B Shelar L: netdev@vger.kernel.org L: dev@openvswitch.org W: http://openvswitch.org @@ -11366,6 +11379,7 @@ F: drivers/net/wireless/quantenna RADEON and AMDGPU DRM DRIVERS M: Alex Deucher M: Christian König +M: David (ChunMing) Zhou L: amd-gfx@lists.freedesktop.org T: git git://people.freedesktop.org/~agd5f/linux S: Supported @@ -11784,6 +11798,18 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-deve S: Maintained F: drivers/net/wireless/realtek/rtl8xxxu/ +RXRPC SOCKETS (AF_RXRPC) +M: David Howells +L: linux-afs@lists.infradead.org +S: Supported +F: net/rxrpc/ +F: include/keys/rxrpc-type.h +F: include/net/af_rxrpc.h +F: include/trace/events/rxrpc.h +F: include/uapi/linux/rxrpc.h +F: Documentation/networking/rxrpc.txt +W: https://www.infradead.org/~dhowells/kafs/ + S3 SAVAGE FRAMEBUFFER DRIVER M: Antonino Daplas L: linux-fbdev@vger.kernel.org @@ -12637,6 +12663,14 @@ S: Maintained F: drivers/ssb/ F: include/linux/ssb/ +SONY IMX274 SENSOR DRIVER +M: Leon Luo +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/imx274.c +F: Documentation/devicetree/bindings/media/i2c/imx274.txt + SONY MEMORYSTICK CARD SUPPORT M: Alex Dubov W: http://tifmxx.berlios.de/ @@ -13103,6 +13137,7 @@ F: drivers/dma/dw/ SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER M: Jie Deng +M: Jose Abreu L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/synopsys/ @@ -13478,6 +13513,7 @@ M: Mika Westerberg M: Yehezkel Bernat T: git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git S: Maintained +F: Documentation/admin-guide/thunderbolt.rst F: drivers/thunderbolt/ F: include/linux/thunderbolt.h @@ -13655,10 +13691,8 @@ F: drivers/net/wireless/ti/ F: include/linux/wl12xx.h TILE ARCHITECTURE -M: Chris Metcalf W: http://www.mellanox.com/repository/solutions/tile-scm/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile.git -S: Supported +S: Orphan F: arch/tile/ F: drivers/char/tile-srom.c F: drivers/edac/tile_edac.c diff --git a/Makefile b/Makefile index c988e46a53cd78cfa3c8759fada57c1ef1ffb3ec..bf5b8cbb9469db3ab4a42e636746cab82a093044 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc8 NAME = Fearless Coyote # *DOCUMENTATION* @@ -484,26 +484,6 @@ CLANG_GCC_TC := --gcc-toolchain=$(GCC_TOOLCHAIN) endif KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) -KBUILD_CFLAGS += $(call cc-disable-warning, gnu) -KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) -# Quiet clang warning: comparison of unsigned expression < 0 is always false -KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) -# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the -# source of a reference will be _MergedGlobals and not on of the whitelisted names. -# See modpost pattern 2 -KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) -KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) -KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) -KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) -else - -# These warnings generated too much noise in a regular build. -# Use make W=1 to enable them (see scripts/Makefile.extrawarn) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) endif ifeq ($(config-targets),1) @@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR endif KBUILD_CFLAGS += $(stackp-flag) +ifeq ($(cc-name),clang) +KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) +KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) +# Quiet clang warning: comparison of unsigned expression < 0 is always false +KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) +# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the +# source of a reference will be _MergedGlobals and not on of the whitelisted names. +# See modpost pattern 2 +KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) +KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior) +KBUILD_CFLAGS += $(call cc-option, -no-integrated-as) +KBUILD_AFLAGS += $(call cc-option, -no-integrated-as) +else + +# These warnings generated too much noise in a regular build. +# Use make W=1 to enable them (see scripts/Makefile.extrawarn) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) +endif + ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else @@ -789,6 +792,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) +# Make sure -fstack-check isn't enabled (like gentoo apparently did) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) + # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild index b15bf6bc0e94f46f035e8781ffa921060341fe91..14a2e9af97e9992d87821e8f11276ecfef8e57cf 100644 --- a/arch/alpha/include/uapi/asm/Kbuild +++ b/arch/alpha/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += bpf_perf_event.h diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 4e6e9f57e790ac0cefcade55664124c9351e830e..dc91c663bcc02e2cdc116f40ad31757d711ac485 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 90MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <90000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 63954a8b0100ebf5746394fedb62b4825048d903..69ff4895f2ba4b558f2bdfed547ef0ec27288174 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -35,6 +35,14 @@ reg = <0x80 0x10>, <0x100 0x10>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 100MHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <100000000>; }; core_intc: archs-intc@cpu { diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 8f627c200d609148c55731aac99b2a353e72f126..006aa3de5348f31c7462f52f173ad2e74434d062 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -114,6 +114,14 @@ reg = <0x00 0x10>, <0x14B8 0x4>; #clock-cells = <0>; clocks = <&input_clk>; + + /* + * Set initial core pll output frequency to 1GHz. + * It will be applied at the core pll driver probing + * on early boot. + */ + assigned-clocks = <&core_clk>; + assigned-clock-rates = <1000000000>; }; serial: serial@5000 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 7b8f8faf8a24315d3379d189cab69506539e04a8..ac6b0ed8341eeff9afdef2544fa48aaac5641deb 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_DRM=y +# CONFIG_DRM_FBDEV_EMULATION is not set +CONFIG_DRM_UDL=y CONFIG_FB=y -CONFIG_FB_UDL=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_HCD=y diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index f35974ee7264a1e13c3fb82b19dc2cc84b9fcc99..c9173c02081c0c3c81e136e3ae226562f5505b8e 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) return 0; __asm__ __volatile__( + " mov lp_count, %5 \n" " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" " breq.d %3, 0, 3f \n" @@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count) " .word 1b, 4b \n" " .previous \n" : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) - : "g"(-EFAULT), "l"(count) - : "memory"); + : "g"(-EFAULT), "r"(count) + : "lp_count", "lp_start", "lp_end", "memory"); return res; } diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild index fa6d0ff4ff894be699616eefad77cd6a2347a3b7..170b5db64afeb7f74fb8279887a7cb75e0205c7b 100644 --- a/arch/arc/include/uapi/asm/Kbuild +++ b/arch/arc/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ef7d9a8ff89231811e73a241a3a3c6d248e720b..9d27331fe69a0eb441b34e51324138ef375070b0 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void) unsigned int exec_ctrl; READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_enb = exec_ctrl & 1; + cpu->extn.dual_enb = !(exec_ctrl & 1); /* dual issue always present for this core */ cpu->extn.dual = 1; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 74315f302971b1989b305b5c8a3ad14517a97cc1..bf40e06f3fb84fec42cefe779d92b18829524af6 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, */ static int __print_sym(unsigned int address, void *unused) { - __print_symbol(" %s\n", address); + printk(" %pS\n", (void *)address); return 0; } diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index bcd7c9fc5d0fc9868c04ff98d08b2e1492d50bb8..133a4dae41fe7d73b07b0d2365a899b943bc4779 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC) DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR) DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT) DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN) +DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0) /* * Entry Point for Misaligned Data access Exception, for emulating in software @@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs) * Thus TRAP_S can be used for specific purpose * -1 used for software breakpointing (gdb) * -2 used by kprobes + * -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as + * -fno-isolate-erroneous-paths-dereference */ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) { @@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs) kgdb_trap(regs); break; + case 5: + do_trap5_error(address, regs); + break; default: break; } @@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs) insterror_is_error(address, regs); } + +/* + * abort() call generated by older gcc for __builtin_trap() + */ +void abort(void) +{ + __asm__ __volatile__("trap_s 5\n"); +} diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 7d8c1d6c2f60f918e95f2cafc81c90c028684b9a..6e9a0a9a6a04e1794ca0bbe6ec75dc5ab0617352 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs) else pr_cont("Bus Error, check PRM\n"); #endif + } else if (vec == ECR_V_TRAP) { + if (regs->ecr_param == 5) + pr_cont("gcc generated __builtin_trap\n"); } else { pr_cont("Check Programmer's Manual\n"); } diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index f1ac6790da5fe64782b59b720bf3ea80d999bff1..46544e88492d5273de4c6a121538fd4a6418ec65 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -317,25 +317,23 @@ static void __init axs103_early_init(void) * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack * of fudging the freq in DT */ +#define AXS103_QUAD_CORE_CPU_FREQ_HZ 50000000 + unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F; if (num_cores > 2) { - u32 freq = 50, orig; - /* - * TODO: use cpu node "cpu-freq" param instead of platform-specific - * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu. - */ + u32 freq; int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk"); const struct fdt_property *prop; prop = fdt_get_property(initial_boot_params, off, - "clock-frequency", NULL); - orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000; + "assigned-clock-rates", NULL); + freq = be32_to_cpu(*(u32 *)(prop->data)); /* Patching .dtb in-place with new core clock value */ - if (freq != orig ) { - freq = cpu_to_be32(freq * 1000000); + if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) { + freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ); fdt_setprop_inplace(initial_boot_params, off, - "clock-frequency", &freq, sizeof(freq)); + "assigned-clock-rates", &freq, sizeof(freq)); } } #endif diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index fd0ae5e38639a8756c86d7882c6e74c88f0e07e5..2958aedb649ab183edcce1ca858006f67fd8ff21 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define CREG_PAE (CREG_BASE + 0x180) #define CREG_PAE_UPDATE (CREG_BASE + 0x194) -#define CREG_CORE_IF_CLK_DIV (CREG_BASE + 0x4B8) -#define CREG_CORE_IF_CLK_DIV_2 0x1 -#define CGU_BASE ARC_PERIPHERAL_BASE -#define CGU_PLL_STATUS (ARC_PERIPHERAL_BASE + 0x4) -#define CGU_PLL_CTRL (ARC_PERIPHERAL_BASE + 0x0) -#define CGU_PLL_STATUS_LOCK BIT(0) -#define CGU_PLL_STATUS_ERR BIT(1) -#define CGU_PLL_CTRL_1GHZ 0x3A10 -#define HSDK_PLL_LOCK_TIMEOUT 500 - -#define HSDK_PLL_LOCKED() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK) - -#define HSDK_PLL_ERR() \ - !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR) - -static void __init hsdk_set_cpu_freq_1ghz(void) -{ - u32 timeout = HSDK_PLL_LOCK_TIMEOUT; - - /* - * As we set cpu clock which exceeds 500MHz, the divider for the interface - * clock must be programmed to div-by-2. - */ - iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV); - - /* Set cpu clock to 1GHz */ - iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL); - - while (!HSDK_PLL_LOCKED() && timeout--) - cpu_relax(); - - if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR()) - pr_err("Failed to setup CPU frequency to 1GHz!"); -} - #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) #define SDIO_UHS_REG_EXT_DIV_2 (2 << 30) @@ -98,12 +62,6 @@ static void __init hsdk_init_early(void) * minimum possible div-by-2. */ iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT); - - /* - * Setup CPU frequency to 1GHz. - * TODO: remove it after smart hsdk pll driver will be introduced. - */ - hsdk_set_cpu_freq_1ghz(); } static const char *hsdk_compat[] __initconst = { diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 1b81c4e757727d2abcc798fefaaa55e0b0fecfec..d37f95025807708a9c486d1e6f6365f9960286b5 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -630,6 +630,7 @@ reg-names = "phy"; status = "disabled"; ti,ctrl_mod = <&usb_ctrl_mod>; + #phy-cells = <0>; }; usb0: usb@47401000 { @@ -678,6 +679,7 @@ reg-names = "phy"; status = "disabled"; ti,ctrl_mod = <&usb_ctrl_mod>; + #phy-cells = <0>; }; usb1: usb@47401800 { diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index e5b061469bf88a234ef2367ecb94a143afca793f..4714a59fd86df05a8715ee400f18e6a71041eac7 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -927,7 +927,8 @@ reg = <0x48038000 0x2000>, <0x46000000 0x400000>; reg-names = "mpu", "dat"; - interrupts = <80>, <81>; + interrupts = , + ; interrupt-names = "tx", "rx"; status = "disabled"; dmas = <&edma 8 2>, @@ -941,7 +942,8 @@ reg = <0x4803C000 0x2000>, <0x46400000 0x400000>; reg-names = "mpu", "dat"; - interrupts = <82>, <83>; + interrupts = , + ; interrupt-names = "tx", "rx"; status = "disabled"; dmas = <&edma 10 2>, diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts index 9e92d480576b04ae89f3e5aefc7c57f3a522070c..3b9a94c274a7b01ed29ead402a4b3538c122138d 100644 --- a/arch/arm/boot/dts/am437x-cm-t43.dts +++ b/arch/arm/boot/dts/am437x-cm-t43.dts @@ -301,8 +301,8 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&spi0_pins>; - dmas = <&edma 16 - &edma 17>; + dmas = <&edma 16 0 + &edma 17 0>; dma-names = "tx0", "rx0"; flash: w25q64cvzpig@0 { diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts index 25d2d720dc0e2cacc98b402e609e0fe78eb01acb..678aa023335d885279d6ba3bcbc67f774c3b1408 100644 --- a/arch/arm/boot/dts/armada-385-db-ap.dts +++ b/arch/arm/boot/dts/armada-385-db-ap.dts @@ -236,6 +236,7 @@ usb3_phy: usb3_phy { compatible = "usb-nop-xceiv"; vcc-supply = <®_xhci0_vbus>; + #phy-cells = <0>; }; reg_xhci0_vbus: xhci0-vbus { diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi index e1f355ffc8f7e07cbbc3912b13e7436f9ac4f407..434dc9aaa5e4e2b46429e221201bf7839ae00a80 100644 --- a/arch/arm/boot/dts/armada-385-linksys.dtsi +++ b/arch/arm/boot/dts/armada-385-linksys.dtsi @@ -66,6 +66,7 @@ usb3_1_phy: usb3_1-phy { compatible = "usb-nop-xceiv"; vcc-supply = <&usb3_1_vbus>; + #phy-cells = <0>; }; usb3_1_vbus: usb3_1-vbus { diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts index 36ad571e76f31c85aff97acc42d7991ddf184e50..0a3552ebda3b80c84588a3cdfd45fb1ae9286fd2 100644 --- a/arch/arm/boot/dts/armada-385-synology-ds116.dts +++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts @@ -191,11 +191,13 @@ usb3_0_phy: usb3_0_phy { compatible = "usb-nop-xceiv"; vcc-supply = <®_usb3_0_vbus>; + #phy-cells = <0>; }; usb3_1_phy: usb3_1_phy { compatible = "usb-nop-xceiv"; vcc-supply = <®_usb3_1_vbus>; + #phy-cells = <0>; }; reg_usb3_0_vbus: usb3-vbus0 { diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts index f503955dbd3b810db157344e3f9cf48dadd4b1ca..51b4ee6df130188cb0fe839ce78aa5f84d5e3607 100644 --- a/arch/arm/boot/dts/armada-388-gp.dts +++ b/arch/arm/boot/dts/armada-388-gp.dts @@ -276,11 +276,13 @@ usb2_1_phy: usb2_1_phy { compatible = "usb-nop-xceiv"; vcc-supply = <®_usb2_1_vbus>; + #phy-cells = <0>; }; usb3_phy: usb3_phy { compatible = "usb-nop-xceiv"; vcc-supply = <®_usb3_vbus>; + #phy-cells = <0>; }; reg_usb3_vbus: usb3-vbus { diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index 45d815a86d420b9f8f108919bb846f865f9b5231..de08d9045cb85bd83884b8a7c05d78296034a94b 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -219,7 +219,7 @@ compatible = "aspeed,ast2400-vuart"; reg = <0x1e787000 0x40>; reg-shift = <2>; - interrupts = <10>; + interrupts = <8>; clocks = <&clk_uart>; no-loopback-test; status = "disabled"; diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 5f29010cdbd8129d57bd2141164f4fdd4e58cc5b..9b82cc8843e1af8df22ee12d4146cd87c725c563 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -221,6 +221,7 @@ jc42@18 { compatible = "nxp,se97b", "jedec,jc-42.4-temp"; reg = <0x18>; + smbus-timeout-disable; }; dpot: mcp4651-104@28 { diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 528b9e3bc1da146fd188b4e2c93ec75b7abb4fdd..dcc55aa84583cdd18f7ef6ecd780eb947be1ef1f 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -85,7 +85,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; @@ -93,7 +93,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 013431e3d7c3140d3a0645bdf4f130e9a860f984..dcde93c85c2d38e0f230ca21c700cb7755d9ec55 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -639,5 +639,6 @@ usbphy: phy { compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; }; diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts index 3bc50849d013ff0442f559dce24274972202f5f2..b8bde13de90a571ea71aeec0515c7c8290ecb7ca 100644 --- a/arch/arm/boot/dts/bcm958623hr.dts +++ b/arch/arm/boot/dts/bcm958623hr.dts @@ -141,10 +141,6 @@ status = "okay"; }; -&sata { - status = "okay"; -}; - &qspi { bspi-sel = <0>; flash: m25p80@0 { diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index d94d14b3c745a0d01c031b6d9b9e6426e0e7b2ec..6a44b8021702176c63d09e55925ffd3d7e02994e 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -177,10 +177,6 @@ status = "okay"; }; -&sata { - status = "okay"; -}; - &srab { compatible = "brcm,bcm58625-srab", "brcm,nsp-srab"; status = "okay"; diff --git a/arch/arm/boot/dts/da850-lego-ev3.dts b/arch/arm/boot/dts/da850-lego-ev3.dts index 413dbd5d9f6442b8dae4b7f132b35e5f816cf798..81942ae83e1f9c9f717dd0fd565a28966c44c1da 100644 --- a/arch/arm/boot/dts/da850-lego-ev3.dts +++ b/arch/arm/boot/dts/da850-lego-ev3.dts @@ -178,7 +178,7 @@ */ battery { pinctrl-names = "default"; - pintctrl-0 = <&battery_pins>; + pinctrl-0 = <&battery_pins>; compatible = "lego,ev3-battery"; io-channels = <&adc 4>, <&adc 3>; io-channel-names = "voltage", "current"; @@ -392,7 +392,7 @@ batt_volt_en { gpio-hog; gpios = <6 GPIO_ACTIVE_HIGH>; - output-low; + output-high; }; }; diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi index 9708157f5daf6f7c84a76e521aae8d27e5684d68..681f5487406e39f7bdad2c793754149c0ab189d3 100644 --- a/arch/arm/boot/dts/dm814x.dtsi +++ b/arch/arm/boot/dts/dm814x.dtsi @@ -75,6 +75,7 @@ reg = <0x47401300 0x100>; reg-names = "phy"; ti,ctrl_mod = <&usb_ctrl_mod>; + #phy-cells = <0>; }; usb0: usb@47401000 { @@ -385,6 +386,7 @@ reg = <0x1b00 0x100>; reg-names = "phy"; ti,ctrl_mod = <&usb_ctrl_mod>; + #phy-cells = <0>; }; }; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index b2b95ff205e81ba9248f3f29416001459e3c3d77..0029ec27819ca361024bfdaccb43b1faededa600 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -664,6 +664,10 @@ status = "okay"; }; +&mixer { + status = "okay"; +}; + /* eMMC flash */ &mmc_0 { status = "okay"; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index 589a67c5f7969fb15b59e1910ea96db528dbf9e4..84f17f7abb7136104cf2265bb34c7e0bba0ebd12 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -433,15 +433,6 @@ clock-names = "ipg", "per"; }; - srtc: srtc@53fa4000 { - compatible = "fsl,imx53-rtc", "fsl,imx25-rtc"; - reg = <0x53fa4000 0x4000>; - interrupts = <24>; - interrupt-parent = <&tzic>; - clocks = <&clks IMX5_CLK_SRTC_GATE>; - clock-names = "ipg"; - }; - iomuxc: iomuxc@53fa8000 { compatible = "fsl,imx53-iomuxc"; reg = <0x53fa8000 0x4000>; diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts index 38faa90007d7f0c7e3042a90aef486321cc4bfa0..2fa5eb4bd4029facce1217e34f266b02daeb987f 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts @@ -72,7 +72,8 @@ }; &gpmc { - ranges = <1 0 0x08000000 0x1000000>; /* CS1: 16MB for LAN9221 */ + ranges = <0 0 0x30000000 0x1000000 /* CS0: 16MB for NAND */ + 1 0 0x2c000000 0x1000000>; /* CS1: 16MB for LAN9221 */ ethernet@gpmc { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 26cce4d18405d5c993377ed7a246d7c80b43dcea..29cb804d10cc7d8ad9c37b13296d31bdf0f83ffe 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -33,11 +33,12 @@ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; /* gpio_4 */ + #phy-cells = <0>; }; }; &gpmc { - ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */ + ranges = <0 0 0x30000000 0x1000000>; /* CS0: 16MB for NAND */ nand@0,0 { compatible = "ti,omap2-nand"; @@ -121,7 +122,7 @@ &mmc3 { interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; - pinctrl-0 = <&mmc3_pins>; + pinctrl-0 = <&mmc3_pins &wl127x_gpio>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; non-removable; @@ -132,8 +133,8 @@ wlcore: wlcore@2 { compatible = "ti,wl1273"; reg = <2>; - interrupt-parent = <&gpio5>; - interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */ + interrupt-parent = <&gpio1>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; /* gpio 2 */ ref-clock-frequency = <26000000>; }; }; @@ -157,8 +158,6 @@ OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat5.sdmmc3_dat1 */ OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat2 */ OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat3 */ - OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT_PULLUP | MUX_MODE4) /* mcbsp4_clkx.gpio_152 */ - OMAP3_CORE1_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */ OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */ OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs2.sdmmc_clk */ >; @@ -228,6 +227,12 @@ OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4) /* sys_boot2.gpio_4 */ >; }; + wl127x_gpio: pinmux_wl127x_gpio_pin { + pinctrl-single,pins = < + OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */ + OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */ + >; + }; }; &omap3_pmx_core2 { diff --git a/arch/arm/boot/dts/ls1021a-qds.dts b/arch/arm/boot/dts/ls1021a-qds.dts index 940875316d0f3926b39ea0717e560288f8594084..67b4de0e343921fda48d7e94b228732fe2d0131d 100644 --- a/arch/arm/boot/dts/ls1021a-qds.dts +++ b/arch/arm/boot/dts/ls1021a-qds.dts @@ -215,7 +215,7 @@ reg = <0x2a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; }; diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts index a8b148ad1dd2c37076a6a89d7c02498064fabc15..44715c8ef756b9f44bdc7a7c69b0cdc12c26d93d 100644 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@ -187,7 +187,7 @@ reg = <0x0a>; VDDA-supply = <®_3p3v>; VDDIO-supply = <®_3p3v>; - clocks = <&sys_mclk 1>; + clocks = <&sys_mclk>; }; }; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 4926133077b3541165ccf989af0834a8362411b3..0d9faf1a51eac0cca63b54cb272bb1c57e8261d2 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -85,15 +85,6 @@ reg = <0x7c00 0x200>; }; - gpio_intc: interrupt-controller@9880 { - compatible = "amlogic,meson-gpio-intc"; - reg = <0xc1109880 0x10>; - interrupt-controller; - #interrupt-cells = <2>; - amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>; - status = "disabled"; - }; - hwrng: rng@8100 { compatible = "amlogic,meson-rng"; reg = <0x8100 0x8>; @@ -191,6 +182,15 @@ status = "disabled"; }; + gpio_intc: interrupt-controller@9880 { + compatible = "amlogic,meson-gpio-intc"; + reg = <0x9880 0x10>; + interrupt-controller; + #interrupt-cells = <2>; + amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>; + status = "disabled"; + }; + wdt: watchdog@9900 { compatible = "amlogic,meson6-wdt"; reg = <0x9900 0x8>; diff --git a/arch/arm/boot/dts/nspire.dtsi b/arch/arm/boot/dts/nspire.dtsi index ec2283b1a638e028d28e57d53e8a7dd9fc2c2778..1a5ae4cd107f08294c4bddce41d7369a76fa9ecd 100644 --- a/arch/arm/boot/dts/nspire.dtsi +++ b/arch/arm/boot/dts/nspire.dtsi @@ -56,6 +56,7 @@ usb_phy: usb_phy { compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; vbus_reg: vbus_reg { diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts index 683b96a8f73e01248349440d1caab8fd7ffeb7cc..0349fcc9dc26ab8d80f568b1d5c8528a78a8a588 100644 --- a/arch/arm/boot/dts/omap3-beagle-xm.dts +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts @@ -90,6 +90,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */ vcc-supply = <&hsusb2_power>; + #phy-cells = <0>; }; tfp410: encoder0 { diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index 4d2eaf843fa960190514dffb06b24d46056adb3e..3ca8991a6c3e977e2a16bb30444db4478a61ca0d 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -64,6 +64,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */ vcc-supply = <&hsusb2_power>; + #phy-cells = <0>; }; sound { diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi index 31d5ebf38892e77d04fc53fcd37922ffaca282e2..ab6003fe5a4341ded0b7668861e7ff1553a9ef57 100644 --- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi +++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi @@ -43,12 +43,14 @@ hsusb1_phy: hsusb1_phy { compatible = "usb-nop-xceiv"; vcc-supply = <&hsusb1_power>; + #phy-cells = <0>; }; /* HS USB Host PHY on PORT 2 */ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; vcc-supply = <&hsusb2_power>; + #phy-cells = <0>; }; ads7846reg: ads7846-reg { diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi index dbc3f030a16c00cf8baf4e75cbd53fa5d21dc2a0..ee64191e41ca187b60eda4e36dc7aeb641c34896 100644 --- a/arch/arm/boot/dts/omap3-evm-common.dtsi +++ b/arch/arm/boot/dts/omap3-evm-common.dtsi @@ -29,6 +29,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; /* gpio_21 */ vcc-supply = <&hsusb2_power>; + #phy-cells = <0>; }; leds { diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 4504908c23fe991ef2805548550ea4b091977522..3dc56fb156b7e88d9503cfea220a3c9067b326c2 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -120,6 +120,7 @@ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>; + #phy-cells = <0>; }; tv0: connector { diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi index 667f96245729cd7ef76c2a27dbce6fa2f6410f52..ecbec23af49f74a6ecccdd0ec99fc434422c9de3 100644 --- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi +++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi @@ -58,6 +58,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; /* gpio_24 */ vcc-supply = <&hsusb1_power>; + #phy-cells = <0>; }; tfp410: encoder { diff --git a/arch/arm/boot/dts/omap3-igep0030-common.dtsi b/arch/arm/boot/dts/omap3-igep0030-common.dtsi index e94d9427450cafa9457be8f5b50d8bfa9a07d8b5..443f717074374274f7d82ffc385cb47e3d9cf8e7 100644 --- a/arch/arm/boot/dts/omap3-igep0030-common.dtsi +++ b/arch/arm/boot/dts/omap3-igep0030-common.dtsi @@ -37,6 +37,7 @@ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>; /* gpio_54 */ + #phy-cells = <0>; }; }; diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi index 343a36d8031d8a2207d9b411ae71000b50232fa0..7ada1e93e166389eb4472cc39d61003b4cc68360 100644 --- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi +++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi @@ -51,6 +51,7 @@ hsusb1_phy: hsusb1_phy { compatible = "usb-nop-xceiv"; vcc-supply = <®_vcc3>; + #phy-cells = <0>; }; }; diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi index f25e158e7163b23201ca8e643f557b3965fc959d..ac141fcd1742e3e29a79c4eaacbf69884b171718 100644 --- a/arch/arm/boot/dts/omap3-overo-base.dtsi +++ b/arch/arm/boot/dts/omap3-overo-base.dtsi @@ -51,6 +51,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio6 23 GPIO_ACTIVE_LOW>; /* gpio_183 */ vcc-supply = <&hsusb2_power>; + #phy-cells = <0>; }; /* Regulator to trigger the nPoweron signal of the Wifi module */ diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index 53e007abdc7159ee69fd35a120ba30edb0704e3c..cd53dc6c00516b4d9be3aa5f3023a7ae1a3ffb48 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -205,6 +205,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>; /* GPIO_16 */ vcc-supply = <&vaux2>; + #phy-cells = <0>; }; /* HS USB Host VBUS supply diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi index 9a601d15247bef5da1519db88b1680b119381b6d..6f5bd027b71753c8508acc93a56f211a95cd12be 100644 --- a/arch/arm/boot/dts/omap3-tao3530.dtsi +++ b/arch/arm/boot/dts/omap3-tao3530.dtsi @@ -46,6 +46,7 @@ compatible = "usb-nop-xceiv"; reset-gpios = <&gpio6 2 GPIO_ACTIVE_LOW>; /* gpio_162 */ vcc-supply = <&hsusb2_power>; + #phy-cells = <0>; }; sound { diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 90b5c7148feb5a6c20763741628fd1e4c41e3909..bb33935df7b057eef7a9b237fc2d2f951f6ab375 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -715,6 +715,7 @@ compatible = "ti,ohci-omap3"; reg = <0x48064400 0x400>; interrupts = <76>; + remote-wakeup-connected; }; usbhsehci: ehci@48064800 { diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index 8b93d37310f28ba4c10ad81d35f8419914ef4633..24a463f8641fe5b968cdb5d62bf15ed4e100b128 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -73,6 +73,7 @@ /* HS USB Host PHY on PORT 1 */ hsusb1_phy: hsusb1_phy { compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; /* LCD regulator from sw5 source */ diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi index 6e6810c258eb29c15ff8b4e87a6ce1a6eac93f5e..eb123b24c8e330141b7ece42c6dea74f0cc2a837 100644 --- a/arch/arm/boot/dts/omap4-duovero.dtsi +++ b/arch/arm/boot/dts/omap4-duovero.dtsi @@ -43,6 +43,7 @@ hsusb1_phy: hsusb1_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */ + #phy-cells = <0>; pinctrl-names = "default"; pinctrl-0 = <&hsusb1phy_pins>; diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi index 22c1eee9b07a28e27cd1a2d66e89100065a3ce7b..5501d1b4e6cdfa152804b7cbb5b277fcdc0417f6 100644 --- a/arch/arm/boot/dts/omap4-panda-common.dtsi +++ b/arch/arm/boot/dts/omap4-panda-common.dtsi @@ -89,6 +89,7 @@ hsusb1_phy: hsusb1_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */ + #phy-cells = <0>; vcc-supply = <&hsusb1_power>; clocks = <&auxclk3_ck>; clock-names = "main_clk"; diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi index 6500bfc8d1309a26198893583bb00861d5a69a1e..10fce28ceb5b7dc38477e60f0d87a86b6a384638 100644 --- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi +++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi @@ -44,6 +44,7 @@ reset-gpios = <&gpio6 17 GPIO_ACTIVE_LOW>; /* gpio 177 */ vcc-supply = <&vbat>; + #phy-cells = <0>; clocks = <&auxclk3_ck>; clock-names = "main_clk"; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 1dc5a76b3c7106c9532f47c71c8052813b2492f9..cc1a07a3620ff5daa9c32770fee918c45e9439cd 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -398,7 +398,7 @@ elm: elm@48078000 { compatible = "ti,am3352-elm"; reg = <0x48078000 0x2000>; - interrupts = <4>; + interrupts = ; ti,hwmods = "elm"; status = "disabled"; }; @@ -1081,14 +1081,13 @@ usbhsohci: ohci@4a064800 { compatible = "ti,ohci-omap3"; reg = <0x4a064800 0x400>; - interrupt-parent = <&gic>; interrupts = ; + remote-wakeup-connected; }; usbhsehci: ehci@4a064c00 { compatible = "ti,ehci-omap"; reg = <0x4a064c00 0x400>; - interrupt-parent = <&gic>; interrupts = ; }; }; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 575ecffb0e9e47cfda8373c22071de12e8789302..1b20838bb9a42ed64b80b0943dd59acf8be0d06d 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -73,12 +73,14 @@ clocks = <&auxclk1_ck>; clock-names = "main_clk"; clock-frequency = <19200000>; + #phy-cells = <0>; }; /* HS USB Host PHY on PORT 3 */ hsusb3_phy: hsusb3_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; /* gpio3_79 ETH_NRESET */ + #phy-cells = <0>; }; tpd12s015: encoder { diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index 5b172a04b6f1b14193f280228432adf19b00296d..5e21fb430a65daa8e29a1ca90a404389d9dc99a9 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -63,12 +63,14 @@ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio3 12 GPIO_ACTIVE_LOW>; /* gpio3_76 HUB_RESET */ + #phy-cells = <0>; }; /* HS USB Host PHY on PORT 3 */ hsusb3_phy: hsusb3_phy { compatible = "usb-nop-xceiv"; reset-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 ETH_RESET */ + #phy-cells = <0>; }; leds { diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 4cd0005e462f7a0b88577a578d0baff8f2b13714..51a7fb3d7b9a019f5b76d5f72afaa0c0c857e050 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -940,6 +940,7 @@ compatible = "ti,ohci-omap3"; reg = <0x4a064800 0x400>; interrupts = ; + remote-wakeup-connected; }; usbhsehci: ehci@4a064c00 { diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 2f017fee4009a2242c3a808c5aa07e29eac36ee9..62baabd757b6ba010693b9b7d4cfc37a367819d8 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -1201,6 +1201,7 @@ clock-names = "extal", "usb_extal"; #clock-cells = <2>; #power-domain-cells = <0>; + #reset-cells = <1>; }; prr: chipid@ff000044 { diff --git a/arch/arm/boot/dts/r8a7792.dtsi b/arch/arm/boot/dts/r8a7792.dtsi index 131f65b0426ea317ccfa4fa8fed0b015611bd48c..3d080e07374ca609d367d09642595ff11f9c6629 100644 --- a/arch/arm/boot/dts/r8a7792.dtsi +++ b/arch/arm/boot/dts/r8a7792.dtsi @@ -829,6 +829,7 @@ clock-names = "extal"; #clock-cells = <2>; #power-domain-cells = <0>; + #reset-cells = <1>; }; }; diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index 58eae569b4e0e3f0920f0e8d0cb4a19cd752d1fd..0cd1035de1a4bd4906eafae6586a596a184a3b84 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -1088,6 +1088,7 @@ clock-names = "extal", "usb_extal"; #clock-cells = <2>; #power-domain-cells = <0>; + #reset-cells = <1>; }; rst: reset-controller@e6160000 { diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index 905e50c9b524d2c4ac40e516f8238e15f48a2a50..5643976c13569541b079dd5f9c9a4545dcf2ffc3 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -1099,6 +1099,7 @@ clock-names = "extal", "usb_extal"; #clock-cells = <2>; #power-domain-cells = <0>; + #reset-cells = <1>; }; rst: reset-controller@e6160000 { diff --git a/arch/arm/boot/dts/rk3066a-marsboard.dts b/arch/arm/boot/dts/rk3066a-marsboard.dts index c6d92c25df42d5a673e991df58f86f566df73679..d23ee6d911acf55cb9330b66330966c33816ac7f 100644 --- a/arch/arm/boot/dts/rk3066a-marsboard.dts +++ b/arch/arm/boot/dts/rk3066a-marsboard.dts @@ -83,6 +83,10 @@ }; }; +&cpu0 { + cpu0-supply = <&vdd_arm>; +}; + &i2c1 { status = "okay"; clock-frequency = <400000>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cd24894ee5c6b14deab21c543bad7c62ec34c93d..6102e4e7f35c15bcb809152453ef3ed82dd579d7 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -956,7 +956,7 @@ iep_mmu: iommu@ff900800 { compatible = "rockchip,iommu"; reg = <0x0 0xff900800 0x0 0x40>; - interrupts = ; + interrupts = ; interrupt-names = "iep_mmu"; #iommu-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index b91300d49a31081269789a6e7437e6d707a8a6f4..5840f5c75c3b388d47242c53aa0c0f7fbe3e3a68 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -502,8 +502,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 6ae4d95e230e58a468c908d85c2cfaf8d87894a0..316cb8b2945b114224d2c75ffa65a3d6a07f3300 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -82,8 +82,8 @@ reg = <0x01c16000 0x1000>; interrupts = <58>; clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 16>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 8bfa12b548e0a2acf8f285505cd3699e27519e86..72d3fe44ecaf0d53f16b28b6433e9853d1a13c2f 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -429,8 +429,8 @@ interrupts = ; clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>, <&ccu CLK_HDMI_DDC>, - <&ccu 7>, - <&ccu 13>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1"; resets = <&ccu RST_AHB1_HDMI>; reset-names = "ahb"; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 68dfa82544fc4c574916c3d60c17a60097ddb2b1..59655e42e4b09a75edc0b285e2c18fd4926e21aa 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -581,8 +581,8 @@ reg = <0x01c16000 0x1000>; interrupts = ; clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>, - <&ccu 9>, - <&ccu 18>; + <&ccu CLK_PLL_VIDEO0_2X>, + <&ccu CLK_PLL_VIDEO1_2X>; clock-names = "ahb", "mod", "pll-0", "pll-1"; dmas = <&dma SUN4I_DMA_NORMAL 16>, <&dma SUN4I_DMA_NORMAL 16>, diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 98715538932f10bd048b355b4f7bb5bfc183a7cd..a021ee6da3968c252a02aefb29c3b99a2e13a096 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -146,6 +146,7 @@ status = "okay"; axp81x: pmic@3a3 { + compatible = "x-powers,axp813"; reg = <0x3a3>; interrupt-parent = <&r_intc>; interrupts = <0 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm/boot/dts/tango4-common.dtsi b/arch/arm/boot/dts/tango4-common.dtsi index 0ec1b0a317b4c4631d4cfe5d9d451e9d16c697de..ff72a8efb73d05e3d219587654b96987ab1b31e5 100644 --- a/arch/arm/boot/dts/tango4-common.dtsi +++ b/arch/arm/boot/dts/tango4-common.dtsi @@ -156,7 +156,6 @@ reg = <0x6e000 0x400>; ranges = <0 0x6e000 0x400>; interrupt-parent = <&gic>; - interrupt-controller; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts index 02a6227c717ca6ce0bd515804887b74778a06ded..4b8edc8982cf156931177b0ecce0f6b9329afce3 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts @@ -121,7 +121,7 @@ switch0port10: port@10 { reg = <10>; label = "dsa"; - phy-mode = "xgmii"; + phy-mode = "xaui"; link = <&switch1port10>; }; }; @@ -208,7 +208,7 @@ switch1port10: port@10 { reg = <10>; label = "dsa"; - phy-mode = "xgmii"; + phy-mode = "xaui"; link = <&switch0port10>; }; }; @@ -359,7 +359,7 @@ }; &i2c1 { - at24mac602@0 { + at24mac602@50 { compatible = "atmel,24c02"; reg = <0x50>; read-only; diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index c8781450905be94995e0df72aeb3cc722afd1b60..3ab8b3781bfeca7264989b813209a99d115d35f2 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -161,8 +161,7 @@ #else #define VTTBR_X (5 - KVM_T0SZ) #endif -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT _AC(48, ULL) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 242151ea69087a4ec8c4b5fd963c210ff30a89fe..a9f7d3f47134a96536480275168247e309d78388 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + return false; +} int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 4d53de308ee089a7b745926ab8da16caa825806e..4d1cc1847edf076dfb3ea03db6712803a851d28b 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -7,6 +7,7 @@ generated-y += unistd-oabi.h generated-y += unistd-eabi.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5cf04888c581df4a21053a826f8a2fdd58204435..3e26c6f7a191a9621a5234598920bb43f260bbbb 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -793,7 +793,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 1712f132b80d2402d94d72ea974a0c3326fa2f52..b83fdc06286a64ece150fb7e419bc587e47c3e34 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -85,7 +85,11 @@ .pushsection .text.fixup,"ax" .align 4 9001: mov r4, #-EFAULT +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + ldr r5, [sp, #9*4] @ *err_ptr +#else ldr r5, [sp, #8*4] @ *err_ptr +#endif str r4, [r5] ldmia sp, {r1, r2} @ retrieve dst, len add r2, r2, r1 diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8be04ec95adf5e4d0969c9c1cc1460e4e31c9caf..5ace9380626a0cc34f074e71a2981c8c42e87c50 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = { { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) }, { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) }, { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) }, - { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, - { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, - { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, - { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, + { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) }, + { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) }, + { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) }, + { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) }, }; static struct edma_soc_info dm365_edma_pdata = { @@ -925,12 +925,14 @@ static struct resource edma_resources[] = { /* not using TC*_ERR */ }; -static struct platform_device dm365_edma_device = { - .name = "edma", - .id = 0, - .dev.platform_data = &dm365_edma_pdata, - .num_resources = ARRAY_SIZE(edma_resources), - .resource = edma_resources, +static const struct platform_device_info dm365_edma_device __initconst = { + .name = "edma", + .id = 0, + .dma_mask = DMA_BIT_MASK(32), + .res = edma_resources, + .num_res = ARRAY_SIZE(edma_resources), + .data = &dm365_edma_pdata, + .size_data = sizeof(dm365_edma_pdata), }; static struct resource dm365_asp_resources[] = { @@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg, static int __init dm365_init_devices(void) { + struct platform_device *edma_pdev; int ret = 0; if (!cpu_is_davinci_dm365()) return 0; davinci_cfg_reg(DM365_INT_EDMA_CC); - platform_device_register(&dm365_edma_device); + edma_pdev = platform_device_register_full(&dm365_edma_device); + if (IS_ERR(edma_pdev)) { + pr_warn("%s: Failed to register eDMA\n", __func__); + return PTR_ERR(edma_pdev); + } platform_device_register(&dm365_mdio_device); platform_device_register(&dm365_emac_device); diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 2555f9056a339b491cdea8bf9305bf8614690c2b..cad7ee8f0d6b49e10fa2cebf094d754e88524e7d 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -102,7 +102,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, scu_base = of_iomap(node, 0); if (!scu_base) { - pr_err("Couln't map SCU registers\n"); + pr_err("Couldn't map SCU registers\n"); return; } diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c index d555791cf349dd160f49be49e58088e16900466b..83c6fa74cc31e41616f06495bc5b210decd6ac6e 100644 --- a/arch/arm/mach-omap2/cm_common.c +++ b/arch/arm/mach-omap2/cm_common.c @@ -68,14 +68,17 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2) int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst, u8 *idlest_reg_id) { + int ret; if (!cm_ll_data->split_idlest_reg) { WARN_ONCE(1, "cm: %s: no low-level function defined\n", __func__); return -EINVAL; } - return cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst, + ret = cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst, idlest_reg_id); + *prcm_inst -= cm_base.offset; + return ret; } /** @@ -337,6 +340,7 @@ int __init omap2_cm_base_init(void) if (mem) { mem->pa = res.start + data->offset; mem->va = data->mem + data->offset; + mem->offset = data->offset; } data->np = np; diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c index 5ac122e88f678b75d6c1060783738321d8b9c579..fa7f308c90279eb20ef89fdcdaafef31a1f8ec2c 100644 --- a/arch/arm/mach-omap2/omap-secure.c +++ b/arch/arm/mach-omap2/omap-secure.c @@ -73,6 +73,27 @@ phys_addr_t omap_secure_ram_mempool_base(void) return omap_secure_memblock_base; } +#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM) +u32 omap3_save_secure_ram(void __iomem *addr, int size) +{ + u32 ret; + u32 param[5]; + + if (size != OMAP3_SAVE_SECURE_RAM_SZ) + return OMAP3_SAVE_SECURE_RAM_SZ; + + param[0] = 4; /* Number of arguments */ + param[1] = __pa(addr); /* Physical address for saving */ + param[2] = 0; + param[3] = 1; + param[4] = 1; + + ret = save_secure_ram_context(__pa(param)); + + return ret; +} +#endif + /** * rx51_secure_dispatcher: Routine to dispatch secure PPA API calls * @idx: The PPA API index diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index bae263fba640af8c1ff7a3b1b98e71a35486e94c..c509cde71f931ab959cc751462d924a04fc0778f 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -31,6 +31,8 @@ /* Maximum Secure memory storage size */ #define OMAP_SECURE_RAM_STORAGE (88 * SZ_1K) +#define OMAP3_SAVE_SECURE_RAM_SZ 0x803F + /* Secure low power HAL API index */ #define OMAP4_HAL_SAVESECURERAM_INDEX 0x1a #define OMAP4_HAL_SAVEHW_INDEX 0x1b @@ -65,6 +67,8 @@ extern u32 omap_smc2(u32 id, u32 falg, u32 pargs); extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs); extern phys_addr_t omap_secure_ram_mempool_base(void); extern int omap_secure_ram_reserve_memblock(void); +extern u32 save_secure_ram_context(u32 args_pa); +extern u32 omap3_save_secure_ram(void __iomem *save_regs, int size); extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, u32 arg1, u32 arg2, u32 arg3, u32 arg4); diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index d45cbfdb4be6838b7f698886a84f4ceb48fe35f8..f0388058b7da399d28d3819ab268d5d0a2a61073 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -391,10 +391,8 @@ omap_device_copy_resources(struct omap_hwmod *oh, const char *name; int error, irq = 0; - if (!oh || !oh->od || !oh->od->pdev) { - error = -EINVAL; - goto error; - } + if (!oh || !oh->od || !oh->od->pdev) + return -EINVAL; np = oh->od->pdev->dev.of_node; if (!np) { @@ -516,8 +514,10 @@ struct platform_device __init *omap_device_build(const char *pdev_name, goto odbs_exit1; od = omap_device_alloc(pdev, &oh, 1); - if (IS_ERR(od)) + if (IS_ERR(od)) { + ret = PTR_ERR(od); goto odbs_exit1; + } ret = platform_device_add_data(pdev, pdata, pdata_len); if (ret) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index d2106ae4410a23fb76c8593b6d123b3a94800ade..52c9d585b44d2607f4d0a4c5d0b1b1d01cb4bac3 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -1646,6 +1646,7 @@ static struct omap_hwmod omap3xxx_mmc3_hwmod = { .main_clk = "mmchs3_fck", .prcm = { .omap2 = { + .module_offs = CORE_MOD, .prcm_reg_id = 1, .module_bit = OMAP3430_EN_MMC3_SHIFT, .idlest_reg_id = 1, diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index b668719b9b25a7ada81229ba18ad2645777cf487..8e30772cfe325a35e52f2feaaa53c9fcc59357cd 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -81,10 +81,6 @@ extern unsigned int omap3_do_wfi_sz; /* ... and its pointer from SRAM after copy */ extern void (*omap3_do_wfi_sram)(void); -/* save_secure_ram_context function pointer and size, for copy to SRAM */ -extern int save_secure_ram_context(u32 *addr); -extern unsigned int save_secure_ram_context_sz; - extern void omap3_save_scratchpad_contents(void); #define PM_RTA_ERRATUM_i608 (1 << 0) diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 841ba19d64a69b153a38ef594220d1a4054d7e59..36c55547137cb08b9b1b662cb5477e591590598d 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -48,6 +48,7 @@ #include "prm3xxx.h" #include "pm.h" #include "sdrc.h" +#include "omap-secure.h" #include "sram.h" #include "control.h" #include "vc.h" @@ -66,7 +67,6 @@ struct power_state { static LIST_HEAD(pwrst_list); -static int (*_omap_save_secure_sram)(u32 *addr); void (*omap3_do_wfi_sram)(void); static struct powerdomain *mpu_pwrdm, *neon_pwrdm; @@ -121,8 +121,8 @@ static void omap3_save_secure_ram_context(void) * will hang the system. */ pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON); - ret = _omap_save_secure_sram((u32 *)(unsigned long) - __pa(omap3_secure_ram_storage)); + ret = omap3_save_secure_ram(omap3_secure_ram_storage, + OMAP3_SAVE_SECURE_RAM_SZ); pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state); /* Following is for error tracking, it should not happen */ if (ret) { @@ -434,15 +434,10 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused) * * The minimum set of functions is pushed to SRAM for execution: * - omap3_do_wfi for erratum i581 WA, - * - save_secure_ram_context for security extensions. */ void omap_push_sram_idle(void) { omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz); - - if (omap_type() != OMAP2_DEVICE_TYPE_GP) - _omap_save_secure_sram = omap_sram_push(save_secure_ram_context, - save_secure_ram_context_sz); } static void __init pm_errata_configure(void) @@ -553,7 +548,7 @@ int __init omap3_pm_init(void) clkdm_add_wkdep(neon_clkdm, mpu_clkdm); if (omap_type() != OMAP2_DEVICE_TYPE_GP) { omap3_secure_ram_storage = - kmalloc(0x803F, GFP_KERNEL); + kmalloc(OMAP3_SAVE_SECURE_RAM_SZ, GFP_KERNEL); if (!omap3_secure_ram_storage) pr_err("Memory allocation failed when allocating for secure sram context\n"); diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h index 0592b23902c6885b03985085ec5c4d7415966d01..0977da0dab76077ee663ffa0dc0ba3f3c32f3a26 100644 --- a/arch/arm/mach-omap2/prcm-common.h +++ b/arch/arm/mach-omap2/prcm-common.h @@ -528,6 +528,7 @@ struct omap_prcm_irq_setup { struct omap_domain_base { u32 pa; void __iomem *va; + s16 offset; }; /** diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c index d2c5bcabdbebe03d9e05e4e551815ce85b9c4207..ebaf80d72a109fdaabb1fbeb21da6d401119ce8d 100644 --- a/arch/arm/mach-omap2/prm33xx.c +++ b/arch/arm/mach-omap2/prm33xx.c @@ -176,17 +176,6 @@ static int am33xx_pwrdm_read_pwrst(struct powerdomain *pwrdm) return v; } -static int am33xx_pwrdm_read_prev_pwrst(struct powerdomain *pwrdm) -{ - u32 v; - - v = am33xx_prm_read_reg(pwrdm->prcm_offs, pwrdm->pwrstst_offs); - v &= AM33XX_LASTPOWERSTATEENTERED_MASK; - v >>= AM33XX_LASTPOWERSTATEENTERED_SHIFT; - - return v; -} - static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm) { am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK, @@ -357,7 +346,6 @@ struct pwrdm_ops am33xx_pwrdm_operations = { .pwrdm_set_next_pwrst = am33xx_pwrdm_set_next_pwrst, .pwrdm_read_next_pwrst = am33xx_pwrdm_read_next_pwrst, .pwrdm_read_pwrst = am33xx_pwrdm_read_pwrst, - .pwrdm_read_prev_pwrst = am33xx_pwrdm_read_prev_pwrst, .pwrdm_set_logic_retst = am33xx_pwrdm_set_logic_retst, .pwrdm_read_logic_pwrst = am33xx_pwrdm_read_logic_pwrst, .pwrdm_read_logic_retst = am33xx_pwrdm_read_logic_retst, diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index fa5fd24f524c5cb233dfb72d73c23929084d7caa..22daf4efed68b2b4b34ea4e5f1c6788f1033c562 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -93,20 +93,13 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) ENDPROC(enable_omap3630_toggle_l2_on_restore) /* - * Function to call rom code to save secure ram context. This gets - * relocated to SRAM, so it can be all in .data section. Otherwise - * we need to initialize api_params separately. + * Function to call rom code to save secure ram context. + * + * r0 = physical address of the parameters */ - .data - .align 3 ENTRY(save_secure_ram_context) stmfd sp!, {r4 - r11, lr} @ save registers on stack - adr r3, api_params @ r3 points to parameters - str r0, [r3,#0x4] @ r0 has sdram address - ldr r12, high_mask - and r3, r3, r12 - ldr r12, sram_phy_addr_mask - orr r3, r3, r12 + mov r3, r0 @ physical address of parameters mov r0, #25 @ set service ID for PPA mov r12, r0 @ copy secure service ID in r12 mov r1, #0 @ set task id for ROM code in r1 @@ -120,18 +113,7 @@ ENTRY(save_secure_ram_context) nop nop ldmfd sp!, {r4 - r11, pc} - .align -sram_phy_addr_mask: - .word SRAM_BASE_P -high_mask: - .word 0xffff -api_params: - .word 0x4, 0x0, 0x0, 0x1, 0x1 ENDPROC(save_secure_ram_context) -ENTRY(save_secure_ram_context_sz) - .word . - save_secure_ram_context - - .text /* * ====================== diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a93339f5178f2eff247144eb9244c077225094bc..c9a7e9e1414f344c9dfd515600e3e4378bf61d81 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065 If unsure, say Y. - config SOCIONEXT_SYNQUACER_PREITS bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" default y @@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802 a 128kB offset to be applied to the target address in this commands. If unsure, say Y. + +config QCOM_FALKOR_ERRATUM_E1041 + bool "Falkor E1041: Speculative instruction fetches might cause errant memory access" + default y + help + Falkor CPU may speculatively fetch instructions from an improper + memory location when MMU translation is changed from SCTLR_ELn[M]=1 + to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index d7c22d51bc5073c13b17143d314e49728bcd3220..4aa50b9b26bca7b904ab3e2921dc844f0f46f9b9 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -12,6 +12,7 @@ subdir-y += cavium subdir-y += exynos subdir-y += freescale subdir-y += hisilicon +subdir-y += lg subdir-y += marvell subdir-y += mediatek subdir-y += nvidia @@ -22,5 +23,4 @@ subdir-y += rockchip subdir-y += socionext subdir-y += sprd subdir-y += xilinx -subdir-y += lg subdir-y += zte diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 45bdbfb961261becf2fb940fb9da19ee0f99560a..4a8d3f83a36eabc50134577306c06febffb9bed8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -75,6 +75,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 806442d3e846881d01e3c971da41af1db869b14f..604cdaedac38eed479a8f24a88efbc4fddc2676e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -77,6 +77,7 @@ pinctrl-0 = <&rmii_pins>; phy-mode = "rmii"; phy-handle = <&ext_rmii_phy1>; + phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 0eb2acedf8c3bc5ff2b4bbde4cddfde4d8c204ad..abe179de35d780cc1d3691394d465817dad30549 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -82,6 +82,7 @@ pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii"; phy-handle = <&ext_rgmii_phy>; + phy-supply = <®_dc1sw>; status = "okay"; }; @@ -95,7 +96,7 @@ &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_vcc1v8>; bus-width = <8>; non-removable; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index a5da18a6f2866d34537c55dcc5d3288daf6c8331..43418bd881d81e73da7634fa6ff62c11d64b65ef 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -45,19 +45,10 @@ #include "sun50i-a64.dtsi" -/ { - reg_vcc3v3: vcc3v3 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; - vmmc-supply = <®_vcc3v3>; + vmmc-supply = <®_dcdc1>; non-removable; disable-wp; bus-width = <4>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts index b6b7a561df8c91d90c57bd2d36cbaba37283bb02..a42fd79a62a30695221375b9a18d0209c29d73dd 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts @@ -71,7 +71,7 @@ pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; vmmc-supply = <®_vcc3v3>; bus-width = <4>; - cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index ead895a4e9a5c9fb6f663092288e8178b95cd6a1..1fb8b9d6cb4ea07c105d088d2ccc5c25ca1ea128 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -753,12 +753,12 @@ &uart_B { clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &vpu { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 8ed981f59e5ae5804da97c887193a32a73b53983..6524b89e7115b5e313834b5ab8565768eb06bd13 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -688,7 +688,7 @@ &uart_A { clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &uart_AO { @@ -703,12 +703,12 @@ &uart_B { clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>; - clock-names = "xtal", "core", "baud"; + clock-names = "xtal", "pclk", "baud"; }; &vpu { diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a298df74ca6c037f373f78f5fd310a4d8e8c0e64..dbe2648649db1e1f74bfb7b2bc97f9765658f7d9 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -255,7 +255,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0d85b315ce711c8f0d896d7861204036ce9b0ec3..73439cf4865964016c14d2237c2f504d3ddfa430 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -145,7 +145,6 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; - renesas,no-ether-link; phy-handle = <&phy0>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index d4f80786e7c20c0a46bacb9636f8defc7ef137b4..3890468678ce1caa78a411aeb0a4a9cdedfc1302 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -132,6 +132,8 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>; clock_in_out = "input"; + /* shows instability at 1GBit right now */ + max-speed = <100>; phy-supply = <&vcc_io>; phy-mode = "rgmii"; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 41d61840fb99ce52ec553c94e119ab63bb79cdbe..2426da6319382dbcab52f286860aabd8651087cd 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -514,7 +514,7 @@ tsadc: tsadc@ff250000 { compatible = "rockchip,rk3328-tsadc"; reg = <0x0 0xff250000 0x0 0x100>; - interrupts = ; + interrupts = ; assigned-clocks = <&cru SCLK_TSADC>; assigned-clock-rates = <50000>; clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 910628d18add07d9a39974bc6ce2ac4a403adb81..1fc5060d7027e6e4b852c6cd71d430d2e7ab3990 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -155,17 +155,6 @@ regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - status = "okay"; - }; }; &cpu_b0 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts index dd7193acc7dfa54aebf712fe7bbb3264159565f0..6bdefb26b3296ae2dda562472bd26a8a8d8b23bd 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts @@ -40,7 +40,6 @@ }; ðsc { - interrupt-parent = <&gpio>; interrupts = <0 8>; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts index d99e3731358c4aed8f7933fb49a8ba4db7a290b0..254d6795c67e94802789038ad3b9ccc872ffb3f5 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts @@ -40,7 +40,6 @@ }; ðsc { - interrupt-parent = <&gpio>; interrupts = <0 8>; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts index 864feeb3518014f2f9670e3f82c5815c624d038a..f9f06fcfb94aa5e2c8552afc11ae2df8bf3c2f10 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts @@ -38,8 +38,7 @@ }; ðsc { - interrupt-parent = <&gpio>; - interrupts = <0 8>; + interrupts = <4 8>; }; &serial0 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 48e733136db4580ffd06963f622b7ce7ada92d61..0ac2ace824350185c0e53369b6702875f1274ea0 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -198,8 +198,8 @@ gpio-controller; #gpio-cells = <2>; gpio-ranges = <&pinctrl 0 0 0>, - <&pinctrl 96 0 0>, - <&pinctrl 160 0 0>; + <&pinctrl 104 0 0>, + <&pinctrl 168 0 0>; gpio-ranges-group-names = "gpio_range0", "gpio_range1", "gpio_range2"; diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aef72d886677758c76d6b932c863893df7c67b53..8b168280976f25de43539ed1b4dbed9b952fcfde 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -512,4 +512,14 @@ alternative_else_nop_endif #endif .endm +/** + * Errata workaround prior to disable MMU. Insert an ISB immediately prior + * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0. + */ + .macro pre_disable_mmu_workaround +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041 + isb +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ac67cfc2585a8af417405958779b792d60b06e6a..060e3a4008abd18e4a5aa48bb5b6fa1674a735c3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -60,6 +60,9 @@ enum ftr_type { #define FTR_VISIBLE true /* Feature visible to the user space */ #define FTR_HIDDEN false /* Feature is hidden from the user */ +#define FTR_VISIBLE_IF_IS_ENABLED(config) \ + (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN) + struct arm64_ftr_bits { bool sign; /* Value is signed ? */ bool visible; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 235e77d982610a0114f62cc833278994cc2e76b2..cbf08d7cbf3089949bb4ada755a36383e6e5db2e 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -91,6 +91,7 @@ #define BRCM_CPU_PART_VULCAN 0x516 #define QCOM_CPU_PART_FALKOR_V1 0x800 +#define QCOM_CPU_PART_FALKOR 0xC00 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -99,6 +100,7 @@ #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) +#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 650344d01124971d98819116b7e657c7fce7c6f4..c4cd5081d78bc4b66e34f3c283d9b1f6ba48518c 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm) * Defer the switch to the current thread's TTBR0_EL1 * until uaccess_enable(). Restore the current * thread's saved ttbr0 corresponding to its active_mm - * (if different from init_mm). */ cpu_set_reserved_ttbr0(); - if (current->active_mm != &init_mm) - update_saved_ttbr0(current, current->active_mm); + update_saved_ttbr0(current, current->active_mm); } } } diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7f069ff37f06cff3a4e032bc5916be1b155ee1f8..715d395ef45bb23f072b34f88116b36940346e95 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -170,8 +170,7 @@ #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) -#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X) #define VTTBR_VMID_SHIFT (UL(48)) #define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 674912d7a571942b956c980aa2096efff32e85f9..ea6cb5b24258be29f39507c39d526ea7e52c7c30 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -370,6 +370,7 @@ void kvm_arm_init_debug(void); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3257895a9b5e413c7c69c9d3cdb2fa23ec030592..9d155fa9a50791af293916cfdc1ede087f850c6d 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -156,29 +156,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) -/* - * This is called when "tsk" is about to enter lazy TLB mode. - * - * mm: describes the currently active mm context - * tsk: task which is entering lazy tlb - * cpu: cpu number which is entering lazy tlb - * - * tsk->mm will be NULL - */ -static inline void -enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -} - #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void update_saved_ttbr0(struct task_struct *tsk, struct mm_struct *mm) { - if (system_uses_ttbr0_pan()) { - BUG_ON(mm->pgd == swapper_pg_dir); - task_thread_info(tsk)->ttbr0 = - virt_to_phys(mm->pgd) | ASID(mm) << 48; - } + u64 ttbr; + + if (!system_uses_ttbr0_pan()) + return; + + if (mm == &init_mm) + ttbr = __pa_symbol(empty_zero_page); + else + ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + + task_thread_info(tsk)->ttbr0 = ttbr; } #else static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -187,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, } #endif +static inline void +enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ + /* + * We don't actually care about the ttbr0 mapping, so point it at the + * zero page. + */ + update_saved_ttbr0(tsk, &init_mm); +} + static inline void __switch_mm(struct mm_struct *next) { unsigned int cpu = smp_processor_id(); @@ -214,11 +216,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, * Update the saved TTBR0_EL1 of the scheduled-in task as the previous * value may have not been initialised yet (activate_mm caller) or the * ASID has changed since the last run (following the context switch - * of another thread of the same process). Avoid setting the reserved - * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit). + * of another thread of the same process). */ - if (next != &init_mm) - update_saved_ttbr0(tsk, next); + update_saved_ttbr0(tsk, next); } #define deactivate_mm(tsk,mm) do { } while (0) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 8d5cbec17d803e37556b5f4a7b25e7b4f1391b35..f9ccc36d3dc3cb2e29ad2cd47f7cdd5eed4cef8a 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -18,6 +18,7 @@ #define __ASM_PERF_EVENT_H #include +#include #define ARMV8_PMU_MAX_COUNTERS 32 #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) @@ -79,6 +80,7 @@ struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs #endif #define perf_arch_fetch_caller_regs(regs, __ip) { \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 149d05fb9421520bd659b62627941ed36ce46bb3..bdcc7f1c9d069df3d95c6884b8071cdba530cfc8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -42,6 +42,8 @@ #include #include #include +#include +#include extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); @@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return clear_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + + return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - return set_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + + if (pte_write(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + + return pte; } static inline pte_t pte_mkold(pte_t pte) @@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } -struct mm_struct; -struct vm_area_struct; - extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); /* @@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * hardware updates of the pte (ptep_set_access_flags safely changes * valid ptes without going through an invalid entry). */ - if (pte_valid(*ptep) && pte_valid(pte)) { + if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) && + (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { VM_WARN_ONCE(!pte_young(pte), "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", __func__, pte_val(*ptep), pte_val(pte)); @@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* - * ptep_set_wrprotect - mark read-only while preserving the hardware update of - * the Access Flag. + * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. */ #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t old_pte, pte; - /* - * ptep_set_wrprotect() is only called on CoW mappings which are - * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE && - * PTE_RDONLY) or writable and software-dirty (PTE_WRITE && - * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and - * protection_map[]. There is no race with the hardware update of the - * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM) - * is set. - */ - VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep), - "%s: potential race with hardware DBM", __func__); pte = READ_ONCE(*ptep); do { old_pte = pte; + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 0000000000000000000000000000000000000000..b551b741653d251d6155a214023b2215ae02e871 --- /dev/null +++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 65f42d2574142d4b37bebf49ed1fc3cdccbb56ae..2a752cb2a0f35a82f2a60e744d160af9b5f6c6a1 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart) mrs x12, sctlr_el1 ldr x13, =SCTLR_ELx_FLAGS bic x12, x12, x13 + pre_disable_mmu_workaround msr sctlr_el1, x12 isb diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c5ba0097887f93e9d30b37355b84e5750d74d04e..a73a5928f09b26ae7b2de7b3c2217e5c975de4a0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 4e6ad355bd058e6a4ab73a0f94832a7b1fe719a6..6b9736c3fb5630ab31c17b662b5c5cfe2b7d0832 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -96,6 +96,7 @@ ENTRY(entry) mrs x0, sctlr_el2 bic x0, x0, #1 << 0 // clear SCTLR.M bic x0, x0, #1 << 2 // clear SCTLR.C + pre_disable_mmu_workaround msr sctlr_el2, x0 isb b 2f @@ -103,6 +104,7 @@ ENTRY(entry) mrs x0, sctlr_el1 bic x0, x0, #1 << 0 // clear SCTLR.M bic x0, x0, #1 << 2 // clear SCTLR.C + pre_disable_mmu_workaround msr sctlr_el1, x0 isb 2: diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5084e699447a4d011742ad964448203c1d93337a..fae81f7964b4f226242961607cb087a20710e22b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -114,7 +114,12 @@ * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ -static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); +struct fpsimd_last_state_struct { + struct fpsimd_state *st; + bool sve_in_use; +}; + +static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ static int sve_default_vl = -1; @@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next) */ struct fpsimd_state *st = &next->thread.fpsimd_state; - if (__this_cpu_read(fpsimd_last_state) == st + if (__this_cpu_read(fpsimd_last_state.st) == st && st->cpu == smp_processor_id()) clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); else @@ -991,6 +996,21 @@ void fpsimd_signal_preserve_current_state(void) sve_to_fpsimd(current); } +/* + * Associate current's FPSIMD context with this cpu + * Preemption must be disabled when calling this function. + */ +static void fpsimd_bind_to_cpu(void) +{ + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + last->st = st; + last->sve_in_use = test_thread_flag(TIF_SVE); + st->cpu = smp_processor_id(); +} + /* * Load the userland FPSIMD state of 'current' from memory, but only if the * FPSIMD state already held in the registers is /not/ the most recent FPSIMD @@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void) local_bh_disable(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { - struct fpsimd_state *st = ¤t->thread.fpsimd_state; - task_fpsimd_load(); - __this_cpu_write(fpsimd_last_state, st); - st->cpu = smp_processor_id(); + fpsimd_bind_to_cpu(); } local_bh_enable(); @@ -1026,18 +1043,14 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd; if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); task_fpsimd_load(); - if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { - struct fpsimd_state *st = ¤t->thread.fpsimd_state; - - __this_cpu_write(fpsimd_last_state, st); - st->cpu = smp_processor_id(); - } + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_bind_to_cpu(); local_bh_enable(); } @@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t) static inline void fpsimd_flush_cpu_state(void) { - __this_cpu_write(fpsimd_last_state, NULL); + __this_cpu_write(fpsimd_last_state.st, NULL); } /* @@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void) #ifdef CONFIG_ARM64_SVE void sve_flush_cpu_state(void) { - struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state); - struct task_struct *tsk; - - if (!fpstate) - return; + struct fpsimd_last_state_struct const *last = + this_cpu_ptr(&fpsimd_last_state); - tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state); - if (test_tsk_thread_flag(tsk, TIF_SVE)) + if (last->st && last->sve_in_use) fpsimd_flush_cpu_state(); } #endif /* CONFIG_ARM64_SVE */ @@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { } #ifdef CONFIG_HOTPLUG_CPU static int fpsimd_cpu_dead(unsigned int cpu) { - per_cpu(fpsimd_last_state, cpu) = NULL; + per_cpu(fpsimd_last_state.st, cpu) = NULL; return 0; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 67e86a0f57ac43edcee10d89bd5db2e050ae1621..e3cb9fbf96b66c3ba2d4327d4c1a4b3ca734ef1f 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -750,6 +750,7 @@ __primary_switch: * to take into account by discarding the current kernel mapping and * creating a new one. */ + pre_disable_mmu_workaround msr sctlr_el1, x20 // disable the MMU isb bl __create_page_tables // recreate kernel mapping diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 749f81779420c7bab2ead0d8f5b9a6cf108e6a45..74bb56f656eff024839df19897ba06512128e9bb 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index b2adcce7bc18e628c924d009ba38190cb1bff951..6b7dcf4310acf6fc04ff40cca4ccd10e27fabec9 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_tsk_thread_flag(p, TIF_SVE); p->thread.sve_state = NULL; + /* + * In case p was allocated the same task_struct pointer as some + * other recently-exited task, make sure p is disassociated from + * any cpu that may have run that now-exited task recently. + * Otherwise we could erroneously skip reloading the FPSIMD + * registers for p. + */ + fpsimd_flush_task_state(p); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->regs[0] = 0; diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index ce704a4aeadd438bf637472bb7037b89fba15087..f407e422a7200b86072349cc70e1e6d5e7e1753b 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel) mrs x0, sctlr_el2 ldr x1, =SCTLR_ELx_FLAGS bic x0, x0, x1 + pre_disable_mmu_workaround msr sctlr_el2, x0 isb 1: diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index dbadfaf850a7d07e202684f293c072e5ed4ec722..fa63b28c65e08a37e269b8d55fa6197e77b42b40 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) } } } + + +/* + * After successfully emulating an instruction, we might want to + * return to user space with a KVM_EXIT_DEBUG. We can only do this + * once the emulation is complete, though, so for userspace emulations + * we have to wait until we have re-entered KVM before calling this + * helper. + * + * Return true (and set exit_reason) to return to userspace or false + * if no further action is required. + */ +bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT; + return true; + } + return false; +} diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index b712479954692f3efd3844e490b412f71c3a96aa..304203fa9e3307583748de10c6c1f7425a231c7b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -28,6 +28,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -186,6 +187,40 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) return arm_exit_handlers[hsr_ec]; } +/* + * We may be single-stepping an emulated instruction. If the emulation + * has been completed in the kernel, we can return to userspace with a + * KVM_EXIT_DEBUG, otherwise userspace needs to complete its + * emulation first. + */ +static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int handled; + + /* + * See ARM ARM B1.14.1: "Hyp traps on instructions + * that fail their condition code check" + */ + if (!kvm_condition_valid(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + handled = 1; + } else { + exit_handle_fn exit_handler; + + exit_handler = kvm_get_exit_handler(vcpu); + handled = exit_handler(vcpu, run); + } + + /* + * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run + * structure if we need to return to userspace. + */ + if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run)) + handled = 0; + + return handled; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -193,8 +228,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index) { - exit_handle_fn exit_handler; - if (ARM_SERROR_PENDING(exception_index)) { u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); @@ -220,20 +253,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, return 1; case ARM_EXCEPTION_EL1_SERROR: kvm_inject_vabt(vcpu); - return 1; - case ARM_EXCEPTION_TRAP: - /* - * See ARM ARM B1.14.1: "Hyp traps on instructions - * that fail their condition code check" - */ - if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + /* We may still need to return for single-step */ + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS) + && kvm_arm_handle_step_debug(vcpu, run)) + return 0; + else return 1; - } - - exit_handler = kvm_get_exit_handler(vcpu); - - return exit_handler(vcpu, run); + case ARM_EXCEPTION_TRAP: + return handle_trap_exceptions(vcpu, run); case ARM_EXCEPTION_HYP_GONE: /* * EL2 has been reset to the hyp-stub. This happens when a guest diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3f9615582377661a88fab8be6a12365d625d830a..870828c364c508f825eacc1c49c17886dc9c8cb2 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -151,6 +151,7 @@ reset: mrs x5, sctlr_el2 ldr x6, =SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc + pre_disable_mmu_workaround msr sctlr_el2, x5 isb diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..f4363d40e2cd7fd62d40d826d5296c95f15cde9f 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) { u64 reg; + /* Clear pmscr in case of early return */ + *pmscr_el1 = 0; + /* SPE present on this CPU? */ if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), ID_AA64DFR0_PMSVER_SHIFT)) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 525c01f48867808b6efa257063daaa4c8207252e..f7c651f3a8c0e8001bb11b1a45216ac6d4a6b342 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -22,6 +22,7 @@ #include #include #include +#include static bool __hyp_text __fpsimd_enabled_nvhe(void) { @@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) +/* Skip an instruction which has been emulated. Returns true if + * execution can continue or false if we need to exit hyp mode because + * single-step was in effect. + */ +static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) { *vcpu_pc(vcpu) = read_sysreg_el2(elr); @@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) } write_sysreg_el2(*vcpu_pc(vcpu), elr); + + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + vcpu->arch.fault.esr_el2 = + (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22; + return false; + } else { + return true; + } } int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) @@ -342,13 +355,21 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) int ret = __vgic_v2_perform_cpuif_access(vcpu); if (ret == 1) { - __skip_instr(vcpu); - goto again; + if (__skip_instr(vcpu)) + goto again; + else + exit_code = ARM_EXCEPTION_TRAP; } if (ret == -1) { - /* Promote an illegal access to an SError */ - __skip_instr(vcpu); + /* Promote an illegal access to an + * SError. If we would be returning + * due to single-step clear the SS + * bit so handle_exit knows what to + * do after dealing with the error. + */ + if (!__skip_instr(vcpu)) + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; exit_code = ARM_EXCEPTION_EL1_SERROR; } @@ -363,8 +384,10 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) int ret = __vgic_v3_perform_cpuif_access(vcpu); if (ret == 1) { - __skip_instr(vcpu); - goto again; + if (__skip_instr(vcpu)) + goto again; + else + exit_code = ARM_EXCEPTION_TRAP; } /* 0 falls through to be handled out of EL2 */ diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ca74a2aace425b95ed95ecf0e70a78188621004e..7b60d62ac5939e83c8e153ec1c3a0447565f23eb 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -389,7 +389,7 @@ void ptdump_check_wx(void) .check_wx = true, }; - walk_pgd(&st, &init_mm, 0); + walk_pgd(&st, &init_mm, VA_START); note_page(&st, 0, 0, 0); if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 22168cd0dde73e06698bc40b166867df17a00134..9b7f89df49dbfe108da2eadc59421ce99a4432b4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; const struct fault_info *inf; - int ret = 0; inf = esr_to_fault_info(esr); pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) if (interrupts_enabled(regs)) nmi_enter(); - ret = ghes_notify_sea(); + ghes_notify_sea(); if (interrupts_enabled(regs)) nmi_exit(); @@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = (void __user *)addr; arm64_notify_die("", regs, &info, esr); - return ret; + return 0; } static const struct fault_info fault_info[] = { diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5960bef0170df85916d0c1ac3b65f570f0af67ea..00e7b900ca4193e83dfa7de7dd506984afe90bce 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -476,6 +476,8 @@ void __init arm64_memblock_init(void) reserve_elfcorehdr(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; + dma_contiguous_reserve(arm64_dma_phys_limit); memblock_allow_resize(); @@ -502,7 +504,6 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); - high_memory = __va((max << PAGE_SHIFT) - 1) + 1; memblock_dump_all(); } diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild index aa624b4ab6557c3e22d3660819f25688d605e3ed..2240b38c2915fa725cf2c5d1afc322edd1bb47c0 100644 --- a/arch/blackfin/include/uapi/asm/Kbuild +++ b/arch/blackfin/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 67ee896a76a7f2f0837436e054b8146dbe7dbac3..26644e15d8540fa43cf70e47acf9ce837dd18fd5 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild index 3687b54bb18ed1a0f36d512af627b8085f15987b..3470c6e9c7b9ba1ca3b5962d276b42a4c2d2e35f 100644 --- a/arch/cris/include/uapi/asm/Kbuild +++ b/arch/cris/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild index b15bf6bc0e94f46f035e8781ffa921060341fe91..14a2e9af97e9992d87821e8f11276ecfef8e57cf 100644 --- a/arch/frv/include/uapi/asm/Kbuild +++ b/arch/frv/include/uapi/asm/Kbuild @@ -1,2 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm + +generic-y += bpf_perf_event.h diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 187aed820e71feac3ffd03e021387bc892bda5e9..2f65f78792cbe5cf7219bb2228fb84e05a0b9204 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild index cb5df3aad3a848e27fdccfd2c36e848b1217bc27..41a176dbb53e4f16524157bae122180d572dd4f5 100644 --- a/arch/hexagon/include/uapi/asm/Kbuild +++ b/arch/hexagon/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index 13a97aa2285f7418d18f1396f03bf6281b580a0f..f5c6967a93bb204ff41bfdee837d3ec4bdaa94dd 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += kvm_para.h diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index c6ecb97151a25774bf75ce5a1663210db4f6bce3..9025699049ca63b3834ed917d935224009d2d7a8 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk) } if (ti->softirq_time) { - delta = cycle_to_nsec(ti->softirq_time)); + delta = cycle_to_nsec(ti->softirq_time); account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); } diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild index 1c44d3b3eba03bac62b9a69e6e560aac61a0a3ff..451bf6071c6e28036f0da81dedb35c8b184f3b5a 100644 --- a/arch/m32r/include/uapi/asm/Kbuild +++ b/arch/m32r/include/uapi/asm/Kbuild @@ -1,5 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += kvm_para.h generic-y += siginfo.h diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index cb79fba79d4391dbe07cd517309f77cbc5b05506..b88a8dd149333d0a0227c28762ca904fbe268ed2 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -122,7 +122,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 55e55dbc2fb66410f757f635a7cc46a7c28b1287..3d07b1de7eb0aa807d9aa637b3b0cec07e4ca1e2 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="../uClinux-dist/romfs" # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 3717b64a620df54a46495f07a3597188b9f727ec..c2e26a44c482da3a6d87d9b26173d64e7c9ca78f 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += ioctl.h generic-y += ipcbuf.h diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds index 3aa571a513b5dfa1f48bf2e17c120289f1a0d246..cf6edda389719535e25d505dfdc31d885995cffb 100644 --- a/arch/m68k/kernel/vmlinux-nommu.lds +++ b/arch/m68k/kernel/vmlinux-nommu.lds @@ -45,6 +45,8 @@ SECTIONS { .text : { HEAD_TEXT TEXT_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds index 89172b8974b95444f4def01ce4104b664391eaea..625a5785804faf8d706442150e6cdcac0ee117c3 100644 --- a/arch/m68k/kernel/vmlinux-std.lds +++ b/arch/m68k/kernel/vmlinux-std.lds @@ -16,6 +16,8 @@ SECTIONS .text : { HEAD_TEXT TEXT_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds index 293990efc9173288d38313e22c81e2c93aea0909..9868270b0984487c5a83100514cd88e8ddd743cd 100644 --- a/arch/m68k/kernel/vmlinux-sun3.lds +++ b/arch/m68k/kernel/vmlinux-sun3.lds @@ -16,6 +16,8 @@ SECTIONS .text : { HEAD_TEXT TEXT_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild index 6ac763d9a3e34e3cfb0caaf0072f78c26ec8b03f..f9eaf07d29f84ab1871d49b89c11a107fb5e1149 100644 --- a/arch/metag/include/uapi/asm/Kbuild +++ b/arch/metag/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 06609ca361150ab77529bf0999d8e258ad25d62c..2c6a6bffea3265d3f3ef1b4d04f6c64347e395e4 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7c8aab23bce8da59f727ff8725250a8f0d385b12..b1f66699677dbaa31931e84f702c88ad0d2feb8f 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -16,7 +16,6 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += sections.h generic-y += segment.h -generic-y += serial.h generic-y += trace_clock.h generic-y += unaligned.h generic-y += user.h diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h new file mode 100644 index 0000000000000000000000000000000000000000..1d830c6666c2731d008388d3b1e44951a94fa639 --- /dev/null +++ b/arch/mips/include/asm/serial.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2017 MIPS Tech, LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef __ASM__SERIAL_H +#define __ASM__SERIAL_H + +#ifdef CONFIG_MIPS_GENERIC +/* + * Generic kernels cannot know a correct value for all platforms at + * compile time. Set it to 0 to prevent 8250_early using it + */ +#define BASE_BAUD 0 +#else +#include +#endif + +#endif /* __ASM__SERIAL_H */ diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild index a0266feba9e6d996d5469ed18fd23df081a2ab38..7a4becd8963a219331632203849998a0a3f56e03 100644 --- a/arch/mips/include/uapi/asm/Kbuild +++ b/arch/mips/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += ipcbuf.h diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index c7ed26029cbbcf0ed7458a145d490957a59abb4e..e68e6e04063a7e0d9f30c50ebac3b082ac7b8b88 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -235,6 +235,7 @@ LEAF(mips_cps_core_init) has_mt t0, 3f .set push + .set MIPS_ISA_LEVEL_RAW .set mt /* Only allow 1 TC per VPE to execute... */ @@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes) #elif defined(CONFIG_MIPS_MT) .set push + .set MIPS_ISA_LEVEL_RAW .set mt /* If the core doesn't support MT then return */ diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 45d0b6b037eeb6a985318df16df6f73a7f1dad9c..57028d49c202aeab53a1b4ccd5d21b3b3052cedb 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) struct task_struct *t; int max_users; + /* If nothing to change, return right away, successfully. */ + if (value == mips_get_process_fp_mode(task)) + return 0; + + /* Only accept a mode change if 64-bit FP enabled for o32. */ + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT)) + return -EOPNOTSUPP; + + /* And only for o32 tasks. */ + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS)) + return -EOPNOTSUPP; + /* Check the value is valid */ if (value & ~known_bits) return -EOPNOTSUPP; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index efbd8df8b6652e7a81baed64134858fd8b3d733a..0b23b1ad99e65f1e21d1810340f9dd306483b8d3 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -419,63 +419,160 @@ static int gpr64_set(struct task_struct *target, #endif /* CONFIG_64BIT */ +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots + * correspond 1:1 to buffer slots. Only general registers are copied. + */ +static int fpr_get_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + return user_regset_copyout(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); +} + +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer, + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's + * general register slots are copied to buffer slots. Only general + * registers are copied. + */ +static int fpr_get_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + void **kbuf, void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS; i++) { + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); + err = user_regset_copyout(pos, count, kbuf, ubuf, + &fpr_val, i * sizeof(elf_fpreg_t), + (i + 1) * sizeof(elf_fpreg_t)); + if (err) + return err; + } + + return 0; +} + +/* + * Copy the floating-point context to the supplied NT_PRFPREG buffer. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - unsigned i; + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); int err; - u64 fpr_val; - /* XXX fcr31 */ + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); - for (i = 0; i < NUM_FPU_REGS; i++) { - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpr_val, i * sizeof(elf_fpreg_t), - (i + 1) * sizeof(elf_fpreg_t)); + return err; +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP + * context's general register slots. Only general registers are copied. + */ +static int fpr_set_fpa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu, + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); +} + +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context, + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64 + * bits only of FP context's general register slots. Only general + * registers are copied. + */ +static int fpr_set_msa(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + unsigned int i; + u64 fpr_val; + int err; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, + &fpr_val, i * sizeof(elf_fpreg_t), + (i + 1) * sizeof(elf_fpreg_t)); if (err) return err; + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); } return 0; } +/* + * Copy the supplied NT_PRFPREG buffer to the floating-point context. + * Choose the appropriate helper for general registers, and then copy + * the FCSR register separately. + * + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', + * which is supposed to have been guaranteed by the kernel before + * calling us, e.g. in `ptrace_regset'. We enforce that requirement, + * so that we can safely avoid preinitializing temporaries for + * partial register writes. + */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - unsigned i; + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + u32 fcr31; int err; - u64 fpr_val; - /* XXX fcr31 */ + BUG_ON(count % sizeof(elf_fpreg_t)); + + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; init_fp_ctx(target); - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu, - 0, sizeof(elf_fpregset_t)); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf); + else + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { + if (count > 0) { err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpr_val, i * sizeof(elf_fpreg_t), - (i + 1) * sizeof(elf_fpreg_t)); + &fcr31, + fcr31_pos, fcr31_pos + sizeof(u32)); if (err) return err; - set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); + + ptrace_setfcr31(target, fcr31); } - return 0; + return err; } enum mips_regset { diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild index c94ee54210bc489efd469493800596cd2b7061a3..81271d3af47cb1000ebfab2539efa92080a1eccc 100644 --- a/arch/mn10300/include/uapi/asm/Kbuild +++ b/arch/mn10300/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += siginfo.h diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index ffca24da7647b80e0f45728dff8da4e9474dc65f..13a3d77b4d7bdc487b814ae2933940638b62c759 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 62286dbeb9043c6ff6eecbd5859c23fd21ce1baa..130c16ccba0a0abb135e31275eb34c564d6cd700 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 9345b44b86f036572e33721eb80e9bbbe4493aa4..f57118e1f6b4265257799ae2cf8ea356077e20b9 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -123,8 +123,8 @@ int puts(const char *s) while ((nuline = strchr(s, '\n')) != NULL) { if (nuline != s) pdc_iodc_print(s, nuline - s); - pdc_iodc_print("\r\n", 2); - s = nuline + 1; + pdc_iodc_print("\r\n", 2); + s = nuline + 1; } if (*s != '\0') pdc_iodc_print(s, strlen(s)); diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index dd5a08aaa4da746ff09b19cf5f93e9d58489ff14..3eb4bfc1fb365478f11c9c87bfb3b3124fab567a 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -12,6 +12,7 @@ for the semaphore. */ #define __PA_LDCW_ALIGNMENT 16 +#define __PA_LDCW_ALIGN_ORDER 4 #define __ldcw_align(a) ({ \ unsigned long __ret = (unsigned long) &(a)->lock[0]; \ __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ @@ -29,6 +30,7 @@ ldcd). */ #define __PA_LDCW_ALIGNMENT 4 +#define __PA_LDCW_ALIGN_ORDER 2 #define __ldcw_align(a) (&(a)->slock) #define __LDCW "ldcw,co" diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index c980a02a52bc0dda0a23b205f59d1d86438553f2..598c8d60fa5e602cc9303e1986ada9680d64feb3 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -35,7 +35,12 @@ struct thread_info { /* thread information allocation */ +#ifdef CONFIG_IRQSTACKS +#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */ +#else #define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */ +#endif + /* Be sure to hunt all references to this down when you change the size of * the kernel stack */ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild index 196d2a4efb312be6d830fe2de80538d1f7aaf8f4..286ef5a5904b02d5f346dd783dbea9fdb6b35e70 100644 --- a/arch/parisc/include/uapi/asm/Kbuild +++ b/arch/parisc/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h +generic-y += bpf_perf_event.h generic-y += kvm_para.h generic-y += param.h generic-y += poll.h diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index d8f77358e2ba29746730f34ba652db327bb2f1d5..29b99b8964aa6c3171d7633ab9863014510330f5 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index a4fd296c958e8e14f13a913aca50510b11eb49b7..e95207c0565eb12308e12d48c45bd5309ae6e2ae 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,14 @@ #endif .import pa_tlb_lock,data + .macro load_pa_tlb_lock reg +#if __PA_LDCW_ALIGNMENT > 4 + load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg + depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg +#else + load32 PA(pa_tlb_lock), \reg +#endif + .endm /* space_to_prot macro creates a prot id from a space id */ @@ -457,7 +466,7 @@ .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop @@ -480,7 +489,7 @@ /* Release pa_tlb_lock lock. */ .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_tlb_lock),\tmp + load_pa_tlb_lock \tmp tlb_unlock0 \spc,\tmp #endif .endm @@ -878,9 +887,6 @@ ENTRY_CFI(syscall_exit_rfi) STREG %r19,PT_SR7(%r16) intr_return: - /* NOTE: Need to enable interrupts incase we schedule. */ - ssm PSW_SM_I, %r0 - /* check for reschedule */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */ @@ -907,6 +913,11 @@ intr_check_sig: LDREG PT_IASQ1(%r16), %r20 cmpib,COND(=),n 0,%r20,intr_restore /* backward */ + /* NOTE: We need to enable interrupts if we have to deliver + * signals. We used to do this earlier but it caused kernel + * stack overflows. */ + ssm PSW_SM_I, %r0 + copy %r0, %r25 /* long in_syscall = 0 */ #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ @@ -958,6 +969,10 @@ intr_do_resched: cmpib,COND(=) 0, %r20, intr_do_preempt nop + /* NOTE: We need to enable interrupts if we schedule. We used + * to do this earlier but it caused kernel stack overflows. */ + ssm PSW_SM_I, %r0 + #ifdef CONFIG_64BIT ldo -16(%r30),%r29 /* Reference param save area */ #endif diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S index e3a8e5e4d5de75897adcea4134f87c7246f60646..8d072c44f300c16d45ba8f4ee0c2eee6435e4ddd 100644 --- a/arch/parisc/kernel/hpmc.S +++ b/arch/parisc/kernel/hpmc.S @@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc) __INITRODATA + .align 4 .export os_hpmc_size os_hpmc_size: .word .os_hpmc_end-.os_hpmc diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index adf7187f89515ec69b19461f60ce379e552397dc..2d40c4ff3f6918ae9b2e2c6af71e20658a9850e1 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -36,6 +36,7 @@ #include #include #include +#include #include .text @@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local) .macro tlb_lock la,flags,tmp #ifdef CONFIG_SMP - ldil L%pa_tlb_lock,%r1 - ldo R%pa_tlb_lock(%r1),\la +#if __PA_LDCW_ALIGNMENT > 4 + load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la + depi 0,31,__PA_LDCW_ALIGN_ORDER, \la +#else + load32 pa_tlb_lock, \la +#endif rsm PSW_SM_I,\flags 1: LDCW 0(\la),\tmp cmpib,<>,n 0,\tmp,3f diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 30f92391a93ef6d5a90970b81921a2133d5e2eb0..cad3e8661cd6cf1895c8b69605dbdad730a4c8e6 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -183,6 +184,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) return 1; } +/* + * Idle thread support + * + * Detect when running on QEMU with SeaBIOS PDC Firmware and let + * QEMU idle the host too. + */ + +int running_on_qemu __read_mostly; + +void __cpuidle arch_cpu_idle_dead(void) +{ + /* nop on real hardware, qemu will offline CPU. */ + asm volatile("or %%r31,%%r31,%%r31\n":::); +} + +void __cpuidle arch_cpu_idle(void) +{ + local_irq_enable(); + + /* nop on real hardware, qemu will idle sleep. */ + asm volatile("or %%r10,%%r10,%%r10\n":::); +} + +static int __init parisc_idle_init(void) +{ + const char *marker; + + /* check QEMU/SeaBIOS marker in PAGE0 */ + marker = (char *) &PAGE0->pad0; + running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0); + + if (!running_on_qemu) + cpu_idle_poll_ctrl(1); + + return 0; +} +arch_initcall(parisc_idle_init); + /* * Copy architecture-specific thread state */ diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 5a657986ebbf4bef7beff4e8c8d20f1343872347..143f90e2f9f3c631616d4af52f0fe3fa08f44af9 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c index 7eab4bb8abe630b14c54c3b457285b4228607dc6..66e506520505d8a3245d49d492831df5e3bbb42a 100644 --- a/arch/parisc/lib/delay.c +++ b/arch/parisc/lib/delay.c @@ -16,9 +16,7 @@ #include #include -#include #include - #include /* for mfctl() */ #include /* for boot_cpu_data */ diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 13f7854e0d49cc796d98a438cad64d76916d6bc9..48f41399fc0b8b63acd84d774a09ad2d0aba5086 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -631,11 +631,11 @@ void __init mem_init(void) mem_init_print_info(NULL); #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */ printk("virtual kernel memory layout:\n" - " vmalloc : 0x%p - 0x%p (%4ld MB)\n" - " memory : 0x%p - 0x%p (%4ld MB)\n" - " .init : 0x%p - 0x%p (%4ld kB)\n" - " .data : 0x%p - 0x%p (%4ld kB)\n" - " .text : 0x%p - 0x%p (%4ld kB)\n", + " vmalloc : 0x%px - 0x%px (%4ld MB)\n" + " memory : 0x%px - 0x%px (%4ld MB)\n" + " .init : 0x%px - 0x%px (%4ld kB)\n" + " .data : 0x%px - 0x%px (%4ld kB)\n" + " .text : 0x%px - 0x%px (%4ld kB)\n", (void*)VMALLOC_START, (void*)VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b62f63b455098e88988fcc0b3776c6..555e22d5e07f9e21c322af718df72bdf2da0dfa4 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b27205297e1d9ca5ca46db9e7189bf9187385803..7197b179c1b150ba819f13a1f7feb6c9a45da1f9 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,59 @@ */ #define EX_R3 EX_DAR +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define RFI_TO_USER_OR_KERNEL \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define RFI_TO_GUEST \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_USER_OR_KERNEL \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_GUEST \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_UNKNOWN \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ @@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1(label, h) \ __EXCEPTION_PROLOG_PSERIES_1(label, h) @@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 8f88f771cc55ce982c11599b273e584babd38600..1e82eb3caabd19c69289957da188b563d0bcd0d6 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,7 +187,20 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + #ifndef __ASSEMBLY__ +#include + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + void apply_feature_fixups(void); void setup_feature_keys(void); #endif diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a409177be8bdfd5f717af6111700d088fb91e61b..f0461618bf7bee95ffd27874e33501bd41ef80a4 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -241,6 +241,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -330,6 +331,17 @@ #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 /* >= 0 values are CPU number */ +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 #define PROC_TABLE_DEREG 0x10 @@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 73b92017b6d7b5231cd8c9d46b7986bb9131f12c..cd2fc1cc1cc7c056255b6f49effefb1e6d0cc1e6 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -76,6 +76,7 @@ struct machdep_calls { void __noreturn (*restart)(char *cmd); void __noreturn (*halt)(void); + void (*panic)(char *str); void (*cpu_die)(void); long (*time_init)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a..e2a2b8400490049143edee40316313a906ca6db7 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b8374e1f1256a6b497d384f3faedc06e..23ac7fc0af23b6cae8f4706665102dd7e2050667 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -232,6 +232,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[EX_SIZE] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 7f01b22fa6cb0d0895c914423a98cb2361fb2a16..55eddf50d1498020ba7f17216c689ab89b84f3c7 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu) return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 257d23dbf55dce902644334280e9d72b3ff91ba8..469b7fdc9be41cd9ab0ceb7f50c2b633c19f01a4 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long); void check_for_initrd(void); void initmem_init(void); +void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES @@ -38,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 0d960ef78a9a95a682fe17d8e5050e3803a57323..1a6ed5919ffdb13878ab7f4a8c2f958c8a41f166 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += param.h generic-y += poll.h generic-y += resource.h diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6b958414b4e036ac1e4c97bceb61277ffab65e76..f390d57cf2e1a711335bbd66cf7819e9dcd8442f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -237,6 +237,11 @@ int main(void) OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); OFFSET(PACA_IN_NMI, paca_struct, in_nmi); + OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); + OFFSET(PACA_EXRFI, paca_struct, exrfi); + OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); + OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 610955fe8b81c528420cc43fec3c92cf80924763..679bbe714e8561b8c259f37bbfc9db8264555554 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -102,6 +102,7 @@ _GLOBAL(__setup_cpu_power9) li r0,0 mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -126,6 +127,7 @@ _GLOBAL(__restore_cpu_power9) li r0,0 mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3320bcac71928eeaf85b884076e90767be4d264f..2748584b767da3f5d788c0be27b27662053853e4 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -37,6 +37,11 @@ #include #include #include +#ifdef CONFIG_PPC_BOOK3S +#include +#else +#include +#endif /* * System calls. @@ -262,13 +267,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ .Lsyscall_error: @@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 - - rfid + RFI_TO_USER b . /* prevent speculative execution */ #endif _ASM_NOKPROBE_SYMBOL(system_call_common); @@ -878,7 +892,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ - rfid +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ @@ -1073,7 +1101,7 @@ __enter_rtas: mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ rtas_return_loc: @@ -1098,7 +1126,7 @@ rtas_return_loc: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ _ASM_NOKPROBE_SYMBOL(__enter_rtas) _ASM_NOKPROBE_SYMBOL(rtas_return_loc) @@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom) LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 - rfid + RFI_TO_KERNEL #endif /* CONFIG_PPC_BOOK3E */ 1: /* Return from OF */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e441b469dc8f61f6381c7c95b086b677004d401b..2dc10bf646b887b51dc2dc28feeb4aac6d9fc00d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -256,7 +256,7 @@ BEGIN_FTR_SECTION LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ 2: /* Stack overflow. Stay on emergency stack and panic. @@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) li r3,MSR_ME andc r10,r10,r3 /* Turn off MSR_ME */ mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 2: /* @@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP - rfid + RFI_TO_USER_OR_KERNEL 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP @@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + andi. r9,r11,MSR_PR // Check for exception from userspace + cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later + /* * Test MSR_RI before calling slb_allocate_realmode, because the * MSR in r11 gets clobbered. However we still want to allocate @@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* All done -- return from exception. */ + bne cr4,1f /* returning to kernel */ + .machine push .machine "power4" mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ mtcrf 0x02,r9 /* I/D indication is in cr6 */ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ @@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_USER + b . /* prevent speculative execution */ +1: +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_CTR(r9, PACA_EXSLB) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + mr r3,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_KERNEL b . /* prevent speculative execution */ + 2: std r3,PACA_EXSLB+EX_DAR(r13) mr r3,r12 mfspr r11,SPRN_SRR0 @@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: std r3,PACA_EXSLB+EX_DAR(r13) @@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . EXC_COMMON_BEGIN(unrecov_slb) @@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtspr SPRN_SRR0,r10 ; \ ld r10,PACAKMSR(r13) ; \ mtspr SPRN_SRR1,r10 ; \ - rfid ; \ + RFI_TO_KERNEL ; \ b . ; /* prevent speculative execution */ #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH @@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ xori r12,r12,MSR_LE ; \ mtspr SPRN_SRR1,r12 ; \ mr r13,r9 ; \ - rfid ; /* return to userspace */ \ + RFI_TO_USER ; /* return to userspace */ \ b . ; /* prevent speculative execution */ #else #define SYSCALL_FASTENDIAN_TEST @@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mtcr r11 REST_GPR(11, r1) ld r1,GPR1(r1) - hrfid + HRFI_TO_USER_OR_KERNEL 1: mtcr r11 REST_GPR(11, r1) @@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) ld r13,PACA_EXGEN+EX_R13(r13) - HRFID + HRFI_TO_UNKNOWN b . #endif @@ -1418,10 +1445,94 @@ masked_##_H##interrupt: \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ /* returns to kernel where r13 must be set up, so don't restore it */ \ - ##_H##rfid; \ + ##_H##RFI_TO_KERNEL; \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + +TRAMP_REAL_BEGIN(hrfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) @@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) addi r13, r13, 4 mtspr SPRN_SRR0, r13 GET_SCRATCH0(r13) - rfid + RFI_TO_KERNEL b . TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) @@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) addi r13, r13, 4 mtspr SPRN_HSRR0, r13 GET_SCRATCH0(r13) - hrfid + HRFI_TO_KERNEL b . #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 04ea5c04fd24825a6c6affc4d7d6956b30321b84..3c2c2688918fffdcfbdbb64e2ec2f1b0dfe1ccf9 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1462,25 +1462,6 @@ static void fadump_init_files(void) return; } -static int fadump_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - /* - * If firmware-assisted dump has been registered then trigger - * firmware-assisted dump and let firmware handle everything - * else. If this returns, then fadump was not registered, so - * go through the rest of the panic path. - */ - crash_fadump(NULL, ptr); - - return NOTIFY_DONE; -} - -static struct notifier_block fadump_panic_block = { - .notifier_call = fadump_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; - /* * Prepare for firmware-assisted dump. */ @@ -1513,9 +1494,6 @@ int __init setup_fadump(void) init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); fadump_init_files(); - atomic_notifier_chain_register(&panic_notifier_list, - &fadump_panic_block); - return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5acb5a176dbe5c8bffe6ddb7458b7d3ac2b7019f..72be0c32e902a35fa45e5ed02036df91999dda58 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2075322cd22522edf7de78e32ef2679fae8e9913..9d213542a48bb91360b7b540ef429473494ebe98 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -704,6 +704,30 @@ int check_legacy_ioport(unsigned long base_port) } EXPORT_SYMBOL(check_legacy_ioport); +static int ppc_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + /* + * If firmware-assisted dump has been registered then trigger + * firmware-assisted dump and let firmware handle everything else. + */ + crash_fadump(NULL, ptr); + ppc_md.panic(ptr); /* May not return */ + return NOTIFY_DONE; +} + +static struct notifier_block ppc_panic_block = { + .notifier_call = ppc_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + +void __init setup_panic(void) +{ + if (!ppc_md.panic) + return; + atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); +} + #ifdef CONFIG_CHECK_CACHE_COHERENCY /* * For platforms that have configurable cache-coherency. This function @@ -848,6 +872,9 @@ void __init setup_arch(char **cmdline_p) /* Probe the machine type, establish ppc_md. */ probe_machine(); + /* Setup panic notifier if requested by the platform. */ + setup_panic(); + /* * Configure ppc_md.power_save (ppc32 only, 64-bit machines do * it from their respective probe() function. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8956a9856604e72634dd11ef416ec9788660bc37..491be4179ddd989fed6cba1e63963bf47653f4ba 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +bool rfi_flush; + +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.l1d.size; + limit = min(safe_stack_limit(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + if (!no_rfi_flush) + rfi_flush_enable(enable); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0494e1566ee2ab9b976cb0d9edb8c195a4490a23..307843d23682a79f0e4d9ae0cdb86a219e0b6e6a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RO_DATA(PAGE_SIZE) +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 29ebe2fd58674c59f27803a5426e4d2414f39418..a93d719edc906887b0f4bf412b2b76ac04babfec 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, gpte->may_read = true; gpte->may_write = true; gpte->page_size = MMU_PAGE_4K; + gpte->wimg = HPTE_R_M; return 0; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 966097232d2147bbcd79df41354a5f973fb9b7c1..b73dbc9e797da76bf4305a73972c4af925fbd70d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -65,11 +65,17 @@ struct kvm_resize_hpt { u32 order; /* These fields protected by kvm->lock */ + + /* Possible values and their usage: + * <0 an error occurred during allocation, + * -EBUSY allocation is in the progress, + * 0 allocation made successfuly. + */ int error; - bool prepare_done; - /* Private to the work thread, until prepare_done is true, - * then protected by kvm->resize_hpt_sem */ + /* Private to the work thread, until error != -EBUSY, + * then protected by kvm->lock. + */ struct kvm_hpt_info hpt; }; @@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) * Reset all the reverse-mapping chains for all memslots */ kvmppc_rmap_reset(kvm); - /* Ensure that each vcpu will flush its TLB on next entry. */ - cpumask_setall(&kvm->arch.need_tlb_flush); err = 0; goto out; } @@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order) kvmppc_set_hpt(kvm, &info); out: + if (err == 0) + /* Ensure that each vcpu will flush its TLB on next entry. */ + cpumask_setall(&kvm->arch.need_tlb_flush); + mutex_unlock(&kvm->lock); return err; } @@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize) static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize) { - BUG_ON(kvm->arch.resize_hpt != resize); + if (WARN_ON(!mutex_is_locked(&kvm->lock))) + return; if (!resize) return; - if (resize->hpt.virt) - kvmppc_free_hpt(&resize->hpt); + if (resize->error != -EBUSY) { + if (resize->hpt.virt) + kvmppc_free_hpt(&resize->hpt); + kfree(resize); + } - kvm->arch.resize_hpt = NULL; - kfree(resize); + if (kvm->arch.resize_hpt == resize) + kvm->arch.resize_hpt = NULL; } static void resize_hpt_prepare_work(struct work_struct *work) @@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work) struct kvm_resize_hpt, work); struct kvm *kvm = resize->kvm; - int err; + int err = 0; - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", - resize->order); - - err = resize_hpt_allocate(resize); + if (WARN_ON(resize->error != -EBUSY)) + return; mutex_lock(&kvm->lock); + /* Request is still current? */ + if (kvm->arch.resize_hpt == resize) { + /* We may request large allocations here: + * do not sleep with kvm->lock held for a while. + */ + mutex_unlock(&kvm->lock); + + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", + resize->order); + + err = resize_hpt_allocate(resize); + + /* We have strict assumption about -EBUSY + * when preparing for HPT resize. + */ + if (WARN_ON(err == -EBUSY)) + err = -EINPROGRESS; + + mutex_lock(&kvm->lock); + /* It is possible that kvm->arch.resize_hpt != resize + * after we grab kvm->lock again. + */ + } + resize->error = err; - resize->prepare_done = true; + + if (kvm->arch.resize_hpt != resize) + resize_hpt_release(kvm, resize); mutex_unlock(&kvm->lock); } @@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, if (resize) { if (resize->order == shift) { - /* Suitable resize in progress */ - if (resize->prepare_done) { - ret = resize->error; - if (ret != 0) - resize_hpt_release(kvm, resize); - } else { + /* Suitable resize in progress? */ + ret = resize->error; + if (ret == -EBUSY) ret = 100; /* estimated time in ms */ - } + else if (ret) + resize_hpt_release(kvm, resize); goto out; } @@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, ret = -ENOMEM; goto out; } + + resize->error = -EBUSY; resize->order = shift; resize->kvm = kvm; INIT_WORK(&resize->work, resize_hpt_prepare_work); @@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm, if (!resize || (resize->order != shift)) goto out; - ret = -EBUSY; - if (!resize->prepare_done) - goto out; - ret = resize->error; - if (ret != 0) + if (ret) goto out; ret = resize_hpt_rehash(resize); - if (ret != 0) + if (ret) goto out; resize_hpt_pivot(resize); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b817d7ca77e6e95ecb9418f430e62d..9c61f736c75b2d0761ec4f9d2e385df75b6dc882 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) mtmsrd r0,1 /* clear RI in MSR */ mtsrr0 r5 mtsrr1 r6 - RFI + RFI_TO_KERNEL kvmppc_call_hv_entry: BEGIN_FTR_SECTION @@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - RFI + RFI_TO_KERNEL /* Virtual-mode return */ .Lvirt_return: @@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r0, VCPU_GPR(R0)(r4) ld r4, VCPU_GPR(R4)(r4) - - hrfid + HRFI_TO_GUEST b . secondary_too_late: @@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r4, PACAKMSR(r13) mtspr SPRN_SRR0, r3 mtspr SPRN_SRR1, r4 - rfid + RFI_TO_KERNEL 9: addi r3, r1, STACK_FRAME_OVERHEAD bl kvmppc_bad_interrupt b 9b diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d0dc8624198f8e4663800c4ca603aea98d4ba128..7deaeeb14b9358d8a4e6f1107cd42bba0605a264 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); #define MSR_USER32 MSR_USER #define MSR_USER64 MSR_USER #define HW_PAGE_SIZE PAGE_SIZE +#define HPTE_R_M _PAGE_COHERENT #endif static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) @@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.eaddr = eaddr; pte.vpage = eaddr >> 12; pte.page_size = MMU_PAGE_64K; + pte.wimg = HPTE_R_M; } switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 42a4b237df5f5731a4f6964feafc4db2835ffdee..34a5adeff0840662e8eae4553b008069bfd9426d 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -46,6 +46,9 @@ #define FUNC(name) name +#define RFI_TO_KERNEL RFI +#define RFI_TO_GUEST RFI + .macro INTERRUPT_TRAMPOLINE intno .global kvmppc_trampoline_\intno @@ -141,7 +144,7 @@ kvmppc_handler_skip_ins: GET_SCRATCH0(r13) /* And get back into the code */ - RFI + RFI_TO_KERNEL #endif /* @@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline) ori r5, r5, MSR_EE mtsrr0 r7 mtsrr1 r6 - RFI + RFI_TO_KERNEL #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 2a2b96d5399917398312dacb7c7b2846483a7fd4..93a180ceefad03343d1f9064420ee00ca54973d7 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -156,7 +156,7 @@ no_dcbz32_on: PPC_LL r9, SVCPU_R9(r3) PPC_LL r3, (SVCPU_R3)(r3) - RFI + RFI_TO_GUEST kvmppc_handler_trampoline_enter_end: @@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) cmpwi r12, BOOK3S_INTERRUPT_DOORBELL beqa BOOK3S_INTERRUPT_DOORBELL - RFI + RFI_TO_KERNEL kvmppc_handler_trampoline_exit_end: diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e03217b9aa02815d7791f0fce72aea2b..0d750d274c4e21a3324eb3505bbd73c86a58cdc9 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) @@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 41cf5ae273cf74a2747d13b9da2274b8eb2054cb..a95ea007d654d5db2b78d811a4ef21a750a95609 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581cec347b6cf34a316c1ef585209653c..6e1e3903538065becbab2ad47febad597f5d6d49 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address) return __bad_area(regs, address, SEGV_MAPERR); } +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_ACCERR); +} + static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) { @@ -490,7 +495,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, good_area: if (unlikely(access_error(is_write, is_exec, vma))) - return bad_area(regs, address); + return bad_access(regs, address); /* * If for any reason at all we couldn't handle the fault, diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1b4caad4769e7686fa0a800695cd4..d183b4801bdbded832b90d2aa1a18e713f70695b 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -763,7 +763,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, func = (u8 *) __bpf_call_base + imm; /* Save skb pointer if we need to re-cache skb data */ - if (bpf_helper_changes_pkt_data(func)) + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -772,7 +773,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_MR(b2p[BPF_REG_0], 3); /* refresh skb cache */ - if (bpf_helper_changes_pkt_data(func)) { + if ((ctx->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data(func)) { /* reload skb pointer to r3 */ PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 9e3da168d54cdcd36e3911ff040ff2ad187c92b7..fce545774d50afc6093c28ad2f4127c24ed5331c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); @@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count, int n = 0; struct perf_event *event; - if (!is_software_event(group)) { + if (group->pmu->task_ctx_nr == perf_hw_context) { if (n >= max_count) return -1; ctrs[n] = group; @@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count, events[n++] = group->hw.config; } list_for_each_entry(event, &group->sibling_list, group_entry) { - if (!is_software_event(event) && + if (event->pmu->task_ctx_nr == perf_hw_context && event->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) return -1; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa2f8816e8c04f47cca691efba0560..be4e7f84f70a59db60e92a9bfe845678f71cc608 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask)) return 0; + /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. @@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); - kfree(per_nest_pmu_arr); return; } @@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1edfbc1e40f4387b30dd5c0a9491935460feaf92..4fb21e17504aad72295a9e1cdffe54c5547379bd 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -37,13 +37,62 @@ #include #include #include +#include #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 9dabea6e14439647b726d007a63de63a853ac3d0..6244bc849469e33af7dcc5a4ac2b21dd970ac1c6 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void) ps3_sys_manager_halt(); /* never returns */ } +static void ps3_panic(char *str) +{ + DBG("%s:%d %s\n", __func__, __LINE__, str); + + smp_send_stop(); + printk("\n"); + printk(" System does not reboot automatically.\n"); + printk(" Please press POWER button.\n"); + printk("\n"); + + while(1) + lv1_pause(1); +} + #if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \ defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE) static void __init prealloc(struct ps3_prealloc *p) @@ -255,6 +269,7 @@ define_machine(ps3) { .probe = ps3_probe, .setup_arch = ps3_setup_arch, .init_IRQ = ps3_init_IRQ, + .panic = ps3_panic, .get_boot_time = ps3_get_boot_time, .set_dabr = ps3_set_dabr, .calibrate_decr = ps3_calibrate_decr, diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6e35780c5962f25144b0181414c03a017840478f..a0b20c03f078cc41b9ad126fbf2603a4ffb68463 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr, static CLASS_ATTR_RW(dlpar); -static int __init pseries_dlpar_init(void) +int __init dlpar_workqueue_init(void) { + if (pseries_hp_wq) + return 0; + pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", - WQ_UNBOUND, 1); + WQ_UNBOUND, 1); + + return pseries_hp_wq ? 0 : -ENOMEM; +} + +static int __init dlpar_sysfs_init(void) +{ + int rc; + + rc = dlpar_workqueue_init(); + if (rc) + return rc; + return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); } -machine_device_initcall(pseries, pseries_dlpar_init); +machine_device_initcall(pseries, dlpar_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4470a3194311e06e040c624b8b1491eb0e071298..1ae1d9f4dbe99935130971cdc390d8f581ad788e 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void) return CMO_PageSize; } +int dlpar_workqueue_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 4923ffe230cf926565c0ed4c9a339a822b073c15..81d8614e73790b1923a3c8cfd336a2531fee76ce 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void) /* Hotplug Events */ np = of_find_node_by_path("/event-sources/hot-plug-events"); if (np != NULL) { - request_event_sources_irqs(np, ras_hotplug_interrupt, + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, "RAS_HOTPLUG"); of_node_put(np); } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 5f1beb8367acaa4562dbc647ad06a1f4c12a8f72..ae4f596273b51a836e5d27307f81bfe6438590bf 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); @@ -726,6 +761,7 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, + .panic = rtas_os_term, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, .set_rtc_time = rtas_set_rtc_time, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea137562f02758aab4e2e604f54e96f..df95102e732cb466911b32632fd53e3f3369bf54 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) } static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class; static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, dev_err(&dev->dev, "No memory for MSI cascade data\n"); return -ENOMEM; } - irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); + irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, + &fsl_msi_irq_request_class); cascade_data->index = offset; cascade_data->msi_data = msi; cascade_data->virq = virt_msir; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 1b2d8cb49abb2e8ea209a511bd239dd58e710661..cab24f549e7cbdc8786c5fa5b7450f0dcda8d87c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs) printf("kernel BUG at %s:%u!\n", bug->file, bug->line); #else - printf("kernel BUG at %p!\n", (void *)bug->bug_addr); + printf("kernel BUG at %px!\n", (void *)bug->bug_addr); #endif #endif /* CONFIG_BUG */ } @@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu) p = &paca[cpu]; - printf("paca for cpu 0x%x @ %p:\n", cpu, p); + printf("paca for cpu 0x%x @ %px:\n", cpu, p); printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no"); printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no"); @@ -2945,7 +2945,7 @@ static void show_task(struct task_struct *tsk) (tsk->exit_state & EXIT_DEAD) ? 'E' : (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; - printf("%p %016lx %6d %6d %c %2d %s\n", tsk, + printf("%px %016lx %6d %6d %c %2d %s\n", tsk, tsk->thread.ksp, tsk->pid, tsk->parent->pid, state, task_thread_info(tsk)->cpu, @@ -2988,7 +2988,7 @@ static void show_pte(unsigned long addr) if (setjmp(bus_error_jmp) != 0) { catch_memory_errors = 0; - printf("*** Error dumping pte for task %p\n", tsk); + printf("*** Error dumping pte for task %px\n", tsk); return; } @@ -3074,7 +3074,7 @@ static void show_tasks(void) if (setjmp(bus_error_jmp) != 0) { catch_memory_errors = 0; - printf("*** Error dumping task %p\n", tsk); + printf("*** Error dumping task %px\n", tsk); return; } diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..47dacf06c679f3eff22c0a6ab0f619d9b4019ab5 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -0,0 +1,75 @@ +CONFIG_SMP=y +CONFIG_PCI=y +CONFIG_PCIE_XILINX=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_EXPERT=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_BPF_SYSCALL=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NETLINK_DIAG=y +CONFIG_DEVTMPFS=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y +CONFIG_MACB=y +CONFIG_E1000E=y +CONFIG_R8169=y +CONFIG_MICROSEMI_PHY=y +CONFIG_INPUT_MOUSEDEV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_PTP_1588_CLOCK is not set +CONFIG_DRM=y +CONFIG_DRM_RADEON=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_USB=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_XHCI_PLATFORM=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_STORAGE=y +CONFIG_USB_UAS=y +CONFIG_VIRTIO_MMIO=y +CONFIG_RAS=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_AUTOFS4_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +# CONFIG_RCU_TRACE is not set +CONFIG_CRYPTO_USER_API_HASH=y diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 773c4e039cd7288bcd25ed53ce831db84c766f26..c0319cbf1eec58d7ea8960259838b865c23f49a1 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,6 +38,25 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) +/* + * This is a very specific barrier: it's currently only used in two places in + * the kernel, both in the scheduler. See include/linux/spinlock.h for the two + * orderings it guarantees, but the "critical section is RCsc" guarantee + * mandates a barrier on RISC-V. The sequence looks like: + * + * lr.aq lock + * sc lock <= LOCKED + * smp_mb__after_spinlock() + * // critical section + * lr lock + * sc.rl lock <= UNLOCKED + * + * The AQ/RL pair provides a RCpc critical section, but there's not really any + * way we can take advantage of that here because the ordering is only enforced + * on that one lock. Thus, we're just doing a full fence. + */ +#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw) + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 0d64bc9f4f91562872b36231e5b593887498cde2..3c7a2c97e377a5af2923c8e7d96cf04e57e66892 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -17,10 +17,10 @@ #include /* Status register flags */ -#define SR_IE _AC(0x00000002, UL) /* Interrupt Enable */ -#define SR_PIE _AC(0x00000020, UL) /* Previous IE */ -#define SR_PS _AC(0x00000100, UL) /* Previously Supervisor */ -#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */ +#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */ +#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */ +#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */ +#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */ #define SR_FS _AC(0x00006000, UL) /* Floating-point Status */ #define SR_FS_OFF _AC(0x00000000, UL) diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index a82ce599b639813c9ed3ad697f217cb09a6538e1..b269451e7e85577c76cb718283806fdfb6f76532 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -21,8 +21,6 @@ #include -#ifdef CONFIG_MMU - extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); /* @@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); extern void iounmap(volatile void __iomem *addr); -#endif /* CONFIG_MMU */ - /* Generic IO read/write. These perform native-endian accesses. */ #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h index 6fdc860d7f84fd69648af547ff8984946eac93fd..07a3c6d5706ff8fd8f34acbe6c5832fc6e638676 100644 --- a/arch/riscv/include/asm/irqflags.h +++ b/arch/riscv/include/asm/irqflags.h @@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void) /* unconditionally enable interrupts */ static inline void arch_local_irq_enable(void) { - csr_set(sstatus, SR_IE); + csr_set(sstatus, SR_SIE); } /* unconditionally disable interrupts */ static inline void arch_local_irq_disable(void) { - csr_clear(sstatus, SR_IE); + csr_clear(sstatus, SR_SIE); } /* get status and disable interrupts */ static inline unsigned long arch_local_irq_save(void) { - return csr_read_clear(sstatus, SR_IE); + return csr_read_clear(sstatus, SR_SIE); } /* test flags */ static inline int arch_irqs_disabled_flags(unsigned long flags) { - return !(flags & SR_IE); + return !(flags & SR_SIE); } /* test hardware interrupt enable bit */ @@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void) /* set interrupt enabled status */ static inline void arch_local_irq_restore(unsigned long flags) { - csr_set(sstatus, flags & SR_IE); + csr_set(sstatus, flags & SR_SIE); } #endif /* _ASM_RISCV_IRQFLAGS_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 2cbd92ed1629c00df42b8ceaffd2250b6de7413a..16301966d65b6fd8a54614d12f0815866d19d948 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -20,8 +20,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_MMU - /* Page Upper Directory not used in RISC-V */ #include #include @@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void) /* No page table caches to initialize */ } -#endif /* CONFIG_MMU */ - #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 93b8956e25e46714a5bd789ed0ea15ebb2a687f2..2c5df945d43c9abfdfd197a61d8c92ce20e48133 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -66,7 +66,7 @@ struct pt_regs { #define REG_FMT "%08lx" #endif -#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0) +#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0) /* Helpers for working with the instruction pointer */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 715b0f10af580811dfca3ba067819e07fe1e7374..7b9c24ebdf5293ac73b8155b2c394fc9244a712d 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -15,8 +15,6 @@ #ifndef _ASM_RISCV_TLBFLUSH_H #define _ASM_RISCV_TLBFLUSH_H -#ifdef CONFIG_MMU - #include /* @@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -#endif /* CONFIG_MMU */ - #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 27b90d64814b8d0422d0316f8e765b6e97081a47..14b0b22fb57875dfe920685265a79da317c517e0 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state); * call. */ -#ifdef CONFIG_MMU #define __get_user_asm(insn, x, ptr, err) \ do { \ uintptr_t __tmp; \ @@ -153,13 +152,11 @@ do { \ __disable_user_access(); \ (x) = __x; \ } while (0) -#endif /* CONFIG_MMU */ #ifdef CONFIG_64BIT #define __get_user_8(x, ptr, err) \ __get_user_asm("ld", x, ptr, err) #else /* !CONFIG_64BIT */ -#ifdef CONFIG_MMU #define __get_user_8(x, ptr, err) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ @@ -193,7 +190,6 @@ do { \ (x) = (__typeof__(x))((__typeof__((x)-(x)))( \ (((u64)__hi << 32) | __lo))); \ } while (0) -#endif /* CONFIG_MMU */ #endif /* CONFIG_64BIT */ @@ -267,8 +263,6 @@ do { \ ((x) = 0, -EFAULT); \ }) - -#ifdef CONFIG_MMU #define __put_user_asm(insn, x, ptr, err) \ do { \ uintptr_t __tmp; \ @@ -292,14 +286,11 @@ do { \ : "rJ" (__x), "i" (-EFAULT)); \ __disable_user_access(); \ } while (0) -#endif /* CONFIG_MMU */ - #ifdef CONFIG_64BIT #define __put_user_8(x, ptr, err) \ __put_user_asm("sd", x, ptr, err) #else /* !CONFIG_64BIT */ -#ifdef CONFIG_MMU #define __put_user_8(x, ptr, err) \ do { \ u32 __user *__ptr = (u32 __user *)(ptr); \ @@ -329,7 +320,6 @@ do { \ : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \ __disable_user_access(); \ } while (0) -#endif /* CONFIG_MMU */ #endif /* CONFIG_64BIT */ @@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) * will set "err" to -EFAULT, while successful accesses return the previous * value. */ -#ifdef CONFIG_MMU #define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ @@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) (err) = __err; \ __ret; \ }) -#endif /* CONFIG_MMU */ #endif /* _ASM_RISCV_UACCESS_H */ diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 9f250ed007cd816e523898a10f32ec6814da6892..2f704a5c4196e30fd2a9a9c3607eeeed3d6f019f 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -14,3 +14,4 @@ #define __ARCH_HAVE_MMU #define __ARCH_WANT_SYS_CLONE #include +#include diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h deleted file mode 100644 index a2ccf189492958e475cc7cc6664d86559124f001..0000000000000000000000000000000000000000 --- a/arch/riscv/include/asm/vdso-syscalls.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2017 SiFive - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#ifndef _ASM_RISCV_VDSO_SYSCALLS_H -#define _ASM_RISCV_VDSO_SYSCALLS_H - -#ifdef CONFIG_SMP - -/* These syscalls are only used by the vDSO and are not in the uapi. */ -#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) -__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) - -#endif - -#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild index 5ded96b063526e0073e3d79a41ba8b62e21e040c..7e91f485047576b559b3a8549ec7b5fc80827ac3 100644 --- a/arch/riscv/include/uapi/asm/Kbuild +++ b/arch/riscv/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += setup.h generic-y += unistd.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h new file mode 100644 index 0000000000000000000000000000000000000000..818655b0d5356a1f5c0768419c6bd98ca5a58ec6 --- /dev/null +++ b/arch/riscv/include/uapi/asm/syscalls.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2017 SiFive + */ + +#ifndef _ASM__UAPI__SYSCALLS_H +#define _ASM__UAPI__SYSCALLS_H + +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * __NR_riscv_flush_icache is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 20ee86f782a93b19779236f39a40daa3d9563ac5..7404ec222406290ed03c671a3b4463c61aa49c12 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -196,7 +196,7 @@ handle_syscall: addi s2, s2, 0x4 REG_S s2, PT_SEPC(sp) /* System calls run with interrupts enabled */ - csrs sstatus, SR_IE + csrs sstatus, SR_SIE /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_TRACE @@ -224,8 +224,8 @@ ret_from_syscall: ret_from_exception: REG_L s0, PT_SSTATUS(sp) - csrc sstatus, SR_IE - andi s0, s0, SR_PS + csrc sstatus, SR_SIE + andi s0, s0, SR_SPP bnez s0, restore_all resume_userspace: @@ -255,7 +255,7 @@ work_pending: bnez s1, work_resched work_notifysig: /* Handle pending signals and notify-resume requests */ - csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */ + csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */ move a0, sp /* pt_regs */ move a1, s0 /* current_thread_info->flags */ tail do_notify_resume diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 0d90dcc1fbd36559823e48db2dc15ac4319b063d..d74d4adf2d54f94a4dff39c14d7953b8c0fdbab8 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs) void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { - regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL; + regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL; regs->sepc = pc; regs->sp = sp; set_fs(USER_DS); @@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, const register unsigned long gp __asm__ ("gp"); memset(childregs, 0, sizeof(struct pt_regs)); childregs->gp = gp; - childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */ + childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */ p->thread.ra = (unsigned long)ret_from_kernel_thread; p->thread.s[0] = usp; /* fn */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 8fbb6749910d42473d814b37eb255f812d4d206b..cb7b0c63014ecbc61c8d9a2b8263a65dd1775d11 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -38,10 +38,6 @@ #include #include -#ifdef CONFIG_HVC_RISCV_SBI -#include -#endif - #ifdef CONFIG_DUMMY_CONSOLE struct screen_info screen_info = { .orig_video_lines = 30, @@ -212,13 +208,6 @@ static void __init setup_bootmem(void) void __init setup_arch(char **cmdline_p) { -#if defined(CONFIG_HVC_RISCV_SBI) - if (likely(early_console == NULL)) { - early_console = &riscv_sbi_early_console_dev; - register_console(early_console); - } -#endif - #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index a2ae936a093e4f91d4def96fff0dd46e6e0226c0..79c78668258ede202086c072c63352ca14585903 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; /* Check the reserved flags. */ - if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL)) return -EINVAL; flush_icache_mm(mm, local); diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index a5bd6401f95e6988a376406f02b7e39cb7c1352e..ade52b903a43f2b27546fdd216b87b7057bc95d4 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -23,5 +23,4 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include -#include }; diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index b0fbad74e873ea9fff553424b975502b589d7097..023e4d4aef588e139fafcbc9e3fb1ddef0482575 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -13,7 +13,6 @@ #include #include -#include .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index df2ca3c65048f9347781b05309c9552eaa7d0844..0713f3c67ab4242a2e54430331044724bf57f289 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) goto vmalloc_fault; /* Enable interrupts if they were enabled in the parent context. */ - if (likely(regs->sstatus & SR_PIE)) + if (likely(regs->sstatus & SR_SPIE)) local_irq_enable(); /* diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index eae2c64cf69d1cc8d6d4d920d8ae4ec18b520bb4..9fdff3fe1a42aac7414c06ccea3018399906fd42 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-y += kernel/ obj-y += mm/ obj-$(CONFIG_KVM) += kvm/ diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile index 99f1cf071304bd7686d1c26c5a7416207c802b13..b06def4a4f2f9955d9bb31c7950eeca0fce92af1 100644 --- a/arch/s390/appldata/Makefile +++ b/arch/s390/appldata/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux - z/VM Monitor Stream. # diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr index f02382ae5c48b1cd319a4cb1e755fa060f03cfdc..42a242597f346345c50d9be5422e74db79ad8496 100644 --- a/arch/s390/boot/compressed/vmlinux.scr +++ b/arch/s390/boot/compressed/vmlinux.scr @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ SECTIONS { .rodata.compressed : { diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index c7de53d8da7553d58c797d7d43de6b23101aadf6..a00c17f761c190269058d2052cc1cf40758bb0c8 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * @@ -16,12 +17,6 @@ * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald * Copyright (c) Jean-Francois Dive - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include #include diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index 2ee25ba252d68ab98efdcd7a525ae0b29778950d..06f601509ce983bf250ac58cf2e15c902f75b7b8 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the linux hypfs filesystem routines. # diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 41c211a4d8b17023ff2fde819d8c103527c09878..0484508693287feebdaf10ffb289161e1c9f2f50 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 generic-y += asm-offsets.h generic-y += cacheflush.h generic-y += clkdev.h diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index a72002056b54848103fc626338fee956c145bfc9..c2cf7bcdef9b7491a6b0c3dbc0998f0686625a51 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_ALTERNATIVE_H #define _ASM_S390_ALTERNATIVE_H diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c02f4aba88a6220bfc1bc7714632d52676aa5bef..cfce6835b109fd88e74d24af3022afdc01ddc5d3 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Adjunct processor (AP) interfaces * * Copyright IBM Corp. 2017 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Tony Krowiak * Martin Schwidefsky * Harald Freudenberger diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h index 0f5bd894f4dcfcbac666e3c5521c52c960d60623..aa42a179be33a5f92592c4099f38dc951e7a7abf 100644 --- a/arch/s390/include/asm/bugs.h +++ b/arch/s390/include/asm/bugs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * S390 version * Copyright IBM Corp. 1999 diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index d6c9d1e0dc2d4bc0fe36a46109211f93682e5cfd..b9c0e361748bb46eb5dddb60f79e5de27eef51b1 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -40,6 +40,7 @@ struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs /* Perf pt_regs extension for sample-data-entry indicators */ struct perf_sf_sde_regs { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57d7bc92e0b8a766d24520ea5234fca56971b646..0a6b0286c32e9e0a7283d9cfb66dc357fe2e36fa 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } -#define pud_write pud_write -static inline int pud_write(pud_t pud) -{ - return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; -} - static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index a3788dafc0e1f2272abd0ba9c455b775e81f627a..6f70d81c40f239fde907795707c2e8ebc2bb9d47 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -74,9 +74,14 @@ enum { */ struct pt_regs { - unsigned long args[1]; - psw_t psw; - unsigned long gprs[NUM_GPRS]; + union { + user_pt_regs user_regs; + struct { + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; + }; + }; unsigned long orig_gpr2; unsigned int int_code; unsigned int int_parm; diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h index 8bfce3475b1c46715d6b82f1d911e82054269b86..97a0582b8d0f175942034895789c8988f24934a5 100644 --- a/arch/s390/include/asm/segment.h +++ b/arch/s390/include/asm/segment.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_SEGMENT_H #define _ASM_SEGMENT_H diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index ec7b476c1ac571b82e219a786ad2525a0ff9ceb2..c61b2cc1a8a86a9508fe093a607d106ff0118be3 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -30,21 +30,20 @@ static inline void restore_access_regs(unsigned int *acrs) asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); } -#define switch_to(prev,next,last) do { \ - if (prev->mm) { \ - save_fpu_regs(); \ - save_access_regs(&prev->thread.acrs[0]); \ - save_ri_cb(prev->thread.ri_cb); \ - save_gs_cb(prev->thread.gs_cb); \ - } \ +#define switch_to(prev, next, last) do { \ + /* save_fpu_regs() sets the CIF_FPU flag, which enforces \ + * a restore of the floating point / vector registers as \ + * soon as the next task returns to user space \ + */ \ + save_fpu_regs(); \ + save_access_regs(&prev->thread.acrs[0]); \ + save_ri_cb(prev->thread.ri_cb); \ + save_gs_cb(prev->thread.gs_cb); \ update_cr_regs(next); \ - if (next->mm) { \ - set_cpu_flag(CIF_FPU); \ - restore_access_regs(&next->thread.acrs[0]); \ - restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - restore_gs_cb(next->thread.gs_cb); \ - } \ - prev = __switch_to(prev,next); \ + restore_access_regs(&next->thread.acrs[0]); \ + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + restore_gs_cb(next->thread.gs_cb); \ + prev = __switch_to(prev, next); \ } while (0) #endif /* __ASM_SWITCH_TO_H */ diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h index d375526c261f19a99053e7e3e06029ecde7c1983..605dc46bac5e0d182268e33246983e12b48bc41a 100644 --- a/arch/s390/include/asm/vga.h +++ b/arch/s390/include/asm/vga.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_VGA_H #define _ASM_S390_VGA_H diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 098f28778a13408e220e7d3b0a8a657e7e508c69..92b7c9b3e6417b50515f2cf4cdc3b4e3f3b7042e 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 0000000000000000000000000000000000000000..cefe7c7cd4f6f29fabd90e33495d7a6d8ac6dbdd --- /dev/null +++ b/arch/s390/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h index 7c8564f98205a4b65163865ee1743c0c09bc3a92..d17dd9e5d51638f08ee44ad3e94d840ea0fc568d 100644 --- a/arch/s390/include/uapi/asm/perf_regs.h +++ b/arch/s390/include/uapi/asm/perf_regs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _ASM_S390_PERF_REGS_H #define _ASM_S390_PERF_REGS_H diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index 0d23c8ff290085b43745fdc23cd18a2911624aca..543dd70e12c81d59a9987c437f1895a216c2525b 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -162,7 +162,7 @@ #define GPR_SIZE 8 #define CR_SIZE 8 -#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ +#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */ #endif /* __s390x__ */ @@ -179,17 +179,16 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_OLDSETOPTIONS 21 #ifndef __ASSEMBLY__ #include #include -typedef union -{ - float f; - double d; - __u64 ui; +typedef union { + float f; + double d; + __u64 ui; struct { __u32 hi; @@ -197,23 +196,21 @@ typedef union } fp; } freg_t; -typedef struct -{ - __u32 fpc; +typedef struct { + __u32 fpc; __u32 pad; - freg_t fprs[NUM_FPRS]; + freg_t fprs[NUM_FPRS]; } s390_fp_regs; -#define FPC_EXCEPTION_MASK 0xF8000000 -#define FPC_FLAGS_MASK 0x00F80000 -#define FPC_DXC_MASK 0x0000FF00 -#define FPC_RM_MASK 0x00000003 +#define FPC_EXCEPTION_MASK 0xF8000000 +#define FPC_FLAGS_MASK 0x00F80000 +#define FPC_DXC_MASK 0x0000FF00 +#define FPC_RM_MASK 0x00000003 /* this typedef defines how a Program Status Word looks like */ -typedef struct -{ - unsigned long mask; - unsigned long addr; +typedef struct { + unsigned long mask; + unsigned long addr; } __attribute__ ((aligned(8))) psw_t; #ifndef __s390x__ @@ -282,33 +279,40 @@ typedef struct /* * The s390_regs structure is used to define the elf_gregset_t. */ -typedef struct -{ +typedef struct { psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned int acrs[NUM_ACRS]; unsigned long orig_gpr2; } s390_regs; +/* + * The user_pt_regs structure exports the beginning of + * the in-kernel pt_regs structure to user space. + */ +typedef struct { + unsigned long args[1]; + psw_t psw; + unsigned long gprs[NUM_GPRS]; +} user_pt_regs; + /* * Now for the user space program event recording (trace) definitions. * The following structures are used only for the ptrace interface, don't * touch or even look at it if you don't want to modify the user-space * ptrace interface. In particular stay away from it for in-kernel PER. */ -typedef struct -{ +typedef struct { unsigned long cr[NUM_CR_WORDS]; } per_cr_words; #define PER_EM_MASK 0xE8000000UL -typedef struct -{ +typedef struct { #ifdef __s390x__ - unsigned : 32; + unsigned : 32; #endif /* __s390x__ */ - unsigned em_branching : 1; + unsigned em_branching : 1; unsigned em_instruction_fetch : 1; /* * Switching on storage alteration automatically fixes @@ -317,44 +321,41 @@ typedef struct unsigned em_storage_alteration : 1; unsigned em_gpr_alt_unused : 1; unsigned em_store_real_address : 1; - unsigned : 3; + unsigned : 3; unsigned branch_addr_ctl : 1; - unsigned : 1; + unsigned : 1; unsigned storage_alt_space_ctl : 1; - unsigned : 21; + unsigned : 21; unsigned long starting_addr; unsigned long ending_addr; } per_cr_bits; -typedef struct -{ +typedef struct { unsigned short perc_atmid; unsigned long address; unsigned char access_id; } per_lowcore_words; -typedef struct -{ - unsigned perc_branching : 1; +typedef struct { + unsigned perc_branching : 1; unsigned perc_instruction_fetch : 1; unsigned perc_storage_alteration : 1; - unsigned perc_gpr_alt_unused : 1; + unsigned perc_gpr_alt_unused : 1; unsigned perc_store_real_address : 1; - unsigned : 3; - unsigned atmid_psw_bit_31 : 1; - unsigned atmid_validity_bit : 1; - unsigned atmid_psw_bit_32 : 1; - unsigned atmid_psw_bit_5 : 1; - unsigned atmid_psw_bit_16 : 1; - unsigned atmid_psw_bit_17 : 1; - unsigned si : 2; + unsigned : 3; + unsigned atmid_psw_bit_31 : 1; + unsigned atmid_validity_bit : 1; + unsigned atmid_psw_bit_32 : 1; + unsigned atmid_psw_bit_5 : 1; + unsigned atmid_psw_bit_16 : 1; + unsigned atmid_psw_bit_17 : 1; + unsigned si : 2; unsigned long address; - unsigned : 4; - unsigned access_id : 4; + unsigned : 4; + unsigned access_id : 4; } per_lowcore_bits; -typedef struct -{ +typedef struct { union { per_cr_words words; per_cr_bits bits; @@ -364,9 +365,9 @@ typedef struct * the kernel always sets them to zero. To enable single * stepping use ptrace(PTRACE_SINGLESTEP) instead. */ - unsigned single_step : 1; + unsigned single_step : 1; unsigned instruction_fetch : 1; - unsigned : 30; + unsigned : 30; /* * These addresses are copied into cr10 & cr11 if single * stepping is switched off @@ -376,11 +377,10 @@ typedef struct union { per_lowcore_words words; per_lowcore_bits bits; - } lowcore; + } lowcore; } per_struct; -typedef struct -{ +typedef struct { unsigned int len; unsigned long kernel_addr; unsigned long process_addr; @@ -390,12 +390,12 @@ typedef struct * S/390 specific non posix ptrace requests. I chose unusual values so * they are unlikely to clash with future ptrace definitions. */ -#define PTRACE_PEEKUSR_AREA 0x5000 -#define PTRACE_POKEUSR_AREA 0x5001 +#define PTRACE_PEEKUSR_AREA 0x5000 +#define PTRACE_POKEUSR_AREA 0x5001 #define PTRACE_PEEKTEXT_AREA 0x5002 #define PTRACE_PEEKDATA_AREA 0x5003 #define PTRACE_POKETEXT_AREA 0x5004 -#define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 #define PTRACE_PEEK_SYSTEM_CALL 0x5007 #define PTRACE_POKE_SYSTEM_CALL 0x5008 @@ -413,21 +413,19 @@ typedef struct * PT_PROT definition is loosely based on hppa bsd definition in * gdb/hppab-nat.c */ -#define PTRACE_PROT 21 +#define PTRACE_PROT 21 -typedef enum -{ +typedef enum { ptprot_set_access_watchpoint, ptprot_set_write_watchpoint, ptprot_disable_watchpoint } ptprot_flags; -typedef struct -{ +typedef struct { unsigned long lowaddr; unsigned long hiaddr; ptprot_flags prot; -} ptprot_area; +} ptprot_area; /* Sequence of bytes for breakpoint illegal instruction. */ #define S390_BREAKPOINT {0x0,0x1} @@ -439,8 +437,7 @@ typedef struct * The user_regs_struct defines the way the user registers are * store on the stack for signal handling. */ -struct user_regs_struct -{ +struct user_regs_struct { psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned int acrs[NUM_ACRS]; diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h index ec113db4eb7e2658909f80f35bc5679cf53b3189..b1b0223169839da11aeb00884d7dc901e4ab5329 100644 --- a/arch/s390/include/uapi/asm/sthyi.h +++ b/arch/s390/include/uapi/asm/sthyi.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_ASM_STHYI_H #define _UAPI_ASM_STHYI_H diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h index 3a77833c74dc20e065e0bc3d77ce0bda9f955fa5..2b605f7e8483675cde7cdf2e553e068afde0f950 100644 --- a/arch/s390/include/uapi/asm/virtio-ccw.h +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* * Definitions for virtio-ccw devices. * diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h index 4caf71714a552332a169cb6799288442984ca088..aeaaa030030e6c9869e0d93b51232176b4845ef2 100644 --- a/arch/s390/include/uapi/asm/vmcp.h +++ b/arch/s390/include/uapi/asm/vmcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * Copyright IBM Corp. 2004, 2005 * Interface implementation for communication with the z/VM control program diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 315986a06cf57f2c768b4c1a549a794af7952b46..574e77622c049c68e557d1a7413e6c161b457fb8 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index f04db3779b34507f9dd38791fc89131505d5f0c3..59eea9c65d3e9e8595d509001b1c794420060887 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis return retval; } + groups_sort(group_info); retval = set_current_groups(group_info); put_group_info(group_info); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index f8603ebed669b6501de788e5c2c6cbb902023f88..54e2d634b849e128c1ec1d1ccc15c600978ed50b 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include #include #include diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 308a7b63348b3f1950c979dd947d22466f89059f..f7fc633855534cbc0ab53c0d00f26aed59c1a1ad 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -370,10 +370,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg) SYSCALL(sys_sendmmsg,compat_sys_sendmmsg) SYSCALL(sys_socket,sys_socket) SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */ -SYSCALL(sys_bind,sys_bind) -SYSCALL(sys_connect,sys_connect) +SYSCALL(sys_bind,compat_sys_bind) +SYSCALL(sys_connect,compat_sys_connect) SYSCALL(sys_listen,sys_listen) -SYSCALL(sys_accept4,sys_accept4) +SYSCALL(sys_accept4,compat_sys_accept4) SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */ SYSCALL(sys_setsockopt,compat_sys_setsockopt) SYSCALL(sys_getsockname,compat_sys_getsockname) diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S index 79a071e4357e4bc51f8813427d0f0ffa4c5ba56c..db19d0680a0afdf34b85eddf418a7e822dcad0a7 100644 --- a/arch/s390/kernel/vdso64/note.S +++ b/arch/s390/kernel/vdso64/note.S @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. * Here we can supply some information useful to userland. diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 6048b1c6e58062bac1258b003a4ebc3814896712..05ee90a5ea08bdb50eb3b9f2b77e70349f7e40bf 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -1,10 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 # Makefile for kernel virtual machines on s390 # # Copyright IBM Corp. 2008 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License (version 2 only) -# as published by the Free Software Foundation. KVM := ../../../virt/kvm common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index d93a2c0474bf67e45698882ecb5f36f0d8688a7d..89aa114a2cbada0989cec25757ec93daed36d064 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling diagnose instructions * * Copyright IBM Corp. 2008, 2011 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index bec42b852246f0c44f88acf20ae36c51a36c3d20..f4c51756c46239cb4246ae7bd2e0aeb166d46383 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * access guest memory * * Copyright IBM Corp. 2008, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index bcbd86621d018085d036f43d815ae1c97791951c..b5f3e82006d0b2e2d8806e8a7e5e58e559255e5b 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kvm guest debug support * * Copyright IBM Corp. 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): David Hildenbrand */ #include diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 8fe034beb623217f42bc990c58dea9984426ef30..9c7d707158622e7f0743570db60599fa95a9de3b 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * in-kernel handling for sie intercepts * * Copyright IBM Corp. 2008, 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fa557372d600a0283663635ff198895cfad91709..024ad8bcc51655e98ffed300817bc0e06e051cf7 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling kvm guest interrupts * * Copyright IBM Corp. 2008, 2015 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte */ diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h index d98e4159643df457a4dfeb50e93ab77d53291698..484608c71dd01185b88f9db1a11e3f523bcefebf 100644 --- a/arch/s390/kvm/irq.h +++ b/arch/s390/kvm/irq.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * s390 irqchip routines * * Copyright IBM Corp. 2014 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck */ #ifndef __KVM_IRQ_H diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9614aea5839b6ecf1c36e2ccbbd64e2592621dd9..2c93cbbcd15e35a389078643874bf424160f25db 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * hosting zSeries kernel virtual machines + * hosting IBM Z kernel virtual machines (s390x) * - * Copyright IBM Corp. 2008, 2009 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. + * Copyright IBM Corp. 2008, 2017 * * Author(s): Carsten Otte * Christian Borntraeger @@ -795,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm) if (kvm->arch.use_cmma) { /* - * Get the last slot. They should be sorted by base_gfn, so the - * last slot is also the one at the end of the address space. - * We have verified above that at least one slot is present. + * Get the first slot. They are reverse sorted by base_gfn, so + * the first slot is also the one at the end of the address + * space. We have verified above that at least one slot is + * present. */ - ms = slots->memslots + slots->used_slots - 1; + ms = slots->memslots; /* round up so we only use full longs */ ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG); /* allocate enough bytes to store all the bits */ @@ -3808,6 +3806,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; break; } + /* do not use irq_state.flags, it will break old QEMUs */ r = kvm_s390_set_irq_state(vcpu, (void __user *) irq_state.buf, irq_state.len); @@ -3823,6 +3822,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; break; } + /* do not use irq_state.flags, it will break old QEMUs */ r = kvm_s390_get_irq_state(vcpu, (__u8 __user *) irq_state.buf, irq_state.len); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 10d65dfbc306736bc31e1a7f363c2b48516c52e7..5e46ba429bcb4dfe4345f531339557b6af71ef40 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for kvm on s390 * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger * Christian Ehrhardt diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c954ac49eee47158ac27bd1d16ba9dbcab7e25a3..0714bfa56da0f54cae66b26bd5329b18beb77b0b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling privileged instructions * * Copyright IBM Corp. 2008, 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger */ @@ -235,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); return -EAGAIN; } - if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) - return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); return 0; } @@ -247,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -276,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -311,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu) int reg1, reg2; int rc; + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + rc = try_handle_skey(vcpu); if (rc) return rc != -EAGAIN ? rc : 0; @@ -1002,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) cbrlo[entries] = gfn << PAGE_SHIFT; } - if (orc) { + if (orc && gfn < ms->bitmap_size) { /* increment only if we are really flipping the bit to 1 */ if (!test_and_set_bit(gfn, ms->pgste_bitmap)) atomic64_inc(&ms->dirty_pages); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 9d592ef4104b0416029944fed5770a8ea74a47cb..c1f5cde2c878e63e32d44a47a10e3c77ccfc32ae 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * handling interprocessor communication * * Copyright IBM Corp. 2008, 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte * Christian Borntraeger * Christian Ehrhardt diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a311938b63b3b4eea0dd57e80bd02c4708dfcd92..5d6ae0326d9e8fa2707e8d066488c8313dfef879 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * kvm nested virtualization support for s390x * * Copyright IBM Corp. 2016 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): David Hildenbrand */ #include diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index cae5a1e16cbd2d9ac5cc7b2fd1f67443919b8f80..c4f8039a35e8dda0bc20999b7db089e3ab09b613 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess); void disable_sacf_uaccess(mm_segment_t old_fs) { + current->thread.mm_segment = old_fs; if (old_fs == USER_DS && test_facility(27)) { __ctl_load(S390_lowcore.user_asce, 1, 1); clear_cpu_flag(CIF_ASCE_PRIMARY); } - current->thread.mm_segment = old_fs; } EXPORT_SYMBOL(disable_sacf_uaccess); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 434a9564917beceadeffd0f34d041683b717af1c..cb364153c43ce204b8736fe1758e7bac8fda2a69 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -83,8 +83,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end) /* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */ VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE); - if (end >= TASK_SIZE_MAX) - return -ENOMEM; rc = 0; notify = 0; while (mm->context.asce_limit < end) { diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index 90568c33ddb0ef72aaed02bcf4bcc15da9afef37..e0d5f245e42bc713443d5c6d09d9034850adbec6 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Arch-specific network modules # diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index e81c16838b90f1bc9a5418bc1b4e5365e9cb0aef..9557d8b516df5a689dda995cd7fd501ddd6cf54c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -55,8 +55,7 @@ struct bpf_jit { #define SEEN_LITERAL 8 /* code uses literals */ #define SEEN_FUNC 16 /* calls C functions */ #define SEEN_TAIL_CALL 32 /* code uses tail calls */ -#define SEEN_SKB_CHANGE 64 /* code changes skb data */ -#define SEEN_REG_AX 128 /* code uses constant blinding */ +#define SEEN_REG_AX 64 /* code uses constant blinding */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth) EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, 152); } - if (jit->seen & SEEN_SKB) + if (jit->seen & SEEN_SKB) { emit_load_skb_data_hlen(jit); - if (jit->seen & SEEN_SKB_CHANGE) /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); + } } /* @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT2(0x0d00, REG_14, REG_W1); /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); - if (bpf_helper_changes_pkt_data((void *)func)) { - jit->seen |= SEEN_SKB_CHANGE; + if ((jit->seen & SEEN_SKB) && + bpf_helper_changes_pkt_data((void *)func)) { /* lg %b1,ST_OFF_SKBP(%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, REG_15, STK_OFF_SKBP); diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile index f94ecaffa71bb543f0d90c9340d7bee23739fe85..66c2dff74895f8ef5924e3a19606e18b7029a882 100644 --- a/arch/s390/numa/Makefile +++ b/arch/s390/numa/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-y += numa.o obj-y += toptree.o obj-$(CONFIG_NUMA_EMU) += mode_emu.o diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 805d8b29193a5964b2d3d6edb617f94a9b37e905..22d0871291eef12438398207cb06e964b14ad73d 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the s390 PCI subsystem. # diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index f7aa5a77827ec17d893d59834a0d33082bb8fd82..2d15d84c20ede64297e0c906a55e3de166f53e3d 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -181,6 +181,9 @@ static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa, static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, size_t size, int flags) { + unsigned long irqflags; + int ret; + /* * With zdev->tlb_refresh == 0, rpcit is not required to establish new * translations when previously invalid translation-table entries are @@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, return 0; } - return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); + ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, + PAGE_ALIGN(size)); + if (ret == -ENOMEM && !s390_iommu_strict) { + /* enable the hypervisor to free some resources */ + if (zpci_refresh_global(zdev)) + goto out; + + spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); + bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, + zdev->lazy_bitmap, zdev->iommu_pages); + bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); + spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); + ret = 0; + } +out: + return ret; } static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 19bcb3b45a70fc12fa426d636fd4482c570c6654..f069929e82114004adea2cc0bfc3abc15bdf23da 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range) if (cc) zpci_err_insn(cc, status, addr, range); + if (cc == 1 && (status == 4 || status == 16)) + return -ENOMEM; + return (cc) ? -EIO : 0; } diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index 01d4c5a4bfe9781fdba3ccfd4e727e518892815f..357d42681cefae0e8c7e40d500bdb5b452d9173e 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Generate opcode table initializers for the in-kernel disassembler. * diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild index c94ee54210bc489efd469493800596cd2b7061a3..81271d3af47cb1000ebfab2539efa92080a1eccc 100644 --- a/arch/score/include/uapi/asm/Kbuild +++ b/arch/score/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += siginfo.h diff --git a/arch/sh/boards/mach-se/770x/setup.c b/arch/sh/boards/mach-se/770x/setup.c index 77c35350ee774d4fa26ed5bc57b70d9d03c7bcab..412326d59e6fcebd51469163f6476b88753aaa4c 100644 --- a/arch/sh/boards/mach-se/770x/setup.c +++ b/arch/sh/boards/mach-se/770x/setup.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include @@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = { #if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\ defined(CONFIG_CPU_SUBTYPE_SH7712) /* SH771X Ethernet driver */ +static struct sh_eth_plat_data sh_eth_plat = { + .phy = PHY_ID, + .phy_interface = PHY_INTERFACE_MODE_MII, +}; + static struct resource sh_eth0_resources[] = { [0] = { .start = SH_ETH0_BASE, - .end = SH_ETH0_BASE + 0x1B8, + .end = SH_ETH0_BASE + 0x1B8 - 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = SH_TSU_BASE, + .end = SH_TSU_BASE + 0x200 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = SH_ETH0_IRQ, .end = SH_ETH0_IRQ, .flags = IORESOURCE_IRQ, @@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = { .name = "sh771x-ether", .id = 0, .dev = { - .platform_data = PHY_ID, + .platform_data = &sh_eth_plat, }, .num_resources = ARRAY_SIZE(sh_eth0_resources), .resource = sh_eth0_resources, @@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = { static struct resource sh_eth1_resources[] = { [0] = { .start = SH_ETH1_BASE, - .end = SH_ETH1_BASE + 0x1B8, + .end = SH_ETH1_BASE + 0x1B8 - 1, .flags = IORESOURCE_MEM, }, [1] = { + .start = SH_TSU_BASE, + .end = SH_TSU_BASE + 0x200 - 1, + .flags = IORESOURCE_MEM, + }, + [2] = { .start = SH_ETH1_IRQ, .end = SH_ETH1_IRQ, .flags = IORESOURCE_IRQ, @@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = { .name = "sh771x-ether", .id = 1, .dev = { - .platform_data = PHY_ID, + .platform_data = &sh_eth_plat, }, .num_resources = ARRAY_SIZE(sh_eth1_resources), .resource = sh_eth1_resources, diff --git a/arch/sh/include/mach-se/mach/se.h b/arch/sh/include/mach-se/mach/se.h index 4246ef9b07a346ed590be0cd3651f5a370ee700a..aa83fe1ff0b124c002e375e2ce5a83c7c853e29c 100644 --- a/arch/sh/include/mach-se/mach/se.h +++ b/arch/sh/include/mach-se/mach/se.h @@ -100,6 +100,7 @@ /* Base address */ #define SH_ETH0_BASE 0xA7000000 #define SH_ETH1_BASE 0xA7000400 +#define SH_TSU_BASE 0xA7000800 /* PHY ID */ #if defined(CONFIG_CPU_SUBTYPE_SH7710) # define PHY_ID 0x00 diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild index e28531333efa96d7195e1e9771d574c83caa040d..ba4d39cb321d0608d96f0b0c5ea45ddadea7cc45 100644 --- a/arch/sh/include/uapi/asm/Kbuild +++ b/arch/sh/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild index 2178c78c7c1a6336d4a11c9619de521519245c0e..4680ba246b554708aec94287f7974adcca4c8c97 100644 --- a/arch/sparc/include/uapi/asm/Kbuild +++ b/arch/sparc/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += types.h diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S index e5547b22cd1832c3aea507b3dfc694da90568222..0ddbbb03182232fe199f5222248617c72a2f23b7 100644 --- a/arch/sparc/lib/hweight.S +++ b/arch/sparc/lib/hweight.S @@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32) .previous ENTRY(__arch_hweight64) - sethi %hi(__sw_hweight16), %g1 - jmpl %g1 + %lo(__sw_hweight16), %g0 + sethi %hi(__sw_hweight64), %g1 + jmpl %g1 + %lo(__sw_hweight64), %g0 nop ENDPROC(__arch_hweight64) EXPORT_SYMBOL(__arch_hweight64) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index be3136f142a9993e0c6c8cfa1d651b1685654a73..a8103a84b4ac4a2ec84c44c302862b3aed8b7e7f 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->pc, (void *)regs->u_regs[UREG_I7], diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 815c03d7a765524424b92866b1567ea2a43695d4..41363f46797bf9f74dd922fadbd2a3f190e8c9bb 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 33c0f8bb0f33de0c6beadd3dd8a9bef6253bcb10..5335ba3c850ed3acdc074ffe639d3ddac101f2ad 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, if (!(pmd_val(pmd) & _PAGE_VALID)) return 0; - if (!pmd_access_permitted(pmd, write)) + if (write && !pmd_write(pmd)) return 0; refs = 0; @@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, if (!(pud_val(pud) & _PAGE_VALID)) return 0; - if (!pud_access_permitted(pud, write)) + if (write && !pud_write(pud)) return 0; refs = 0; diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 5765e7e711f78248d2bff70f9c57ca48a4514355..ff5f9cb3039af1f91c8701915f08c051c21d0d81 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 *func = ((u8 *)__bpf_call_base) + imm; ctx->saw_call = true; + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); emit_call((u32 *)func, ctx); emit_nop(ctx); emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) + load_skb_regs(ctx, L7); break; } diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild index 5711de0a1b5efc92519e152462a0ef1516612add..cc439612bcd52fee78256802f5aac1ded0c37ec8 100644 --- a/arch/tile/include/uapi/asm/Kbuild +++ b/arch/tile/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 50a32c33d729ba2a570eadaf77cff69925218c42..73c57f614c9e0600a5b4df7b28a7fa55f4abe471 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,4 +1,5 @@ generic-y += barrier.h +generic-y += bpf_perf_event.h generic-y += bug.h generic-y += clkdev.h generic-y += current.h diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index b668e351fd6c2e4f7a4b75c8a67eada77449abc9..fca34b2177e28a055663055d01c4fb7d78420285 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm); /* * Needed since we do not use the asm-generic/mm_hooks.h: */ -static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { uml_setup_stubs(mm); + return 0; } extern void arch_exit_mmap(struct mm_struct *mm); static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 4e6fcb32620ffb2125f648622499e5bf7c950e72..428644175956231aad112a0ce221452913736635 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs) if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), (void *)UPT_IP(regs), (void *)UPT_SP(regs), diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 59b06b48f27d7a4e0d8b82fc147d3fdad7f75295..5c205a9cb5a6a4bb2c865255bc946d7ca4882db1 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -81,9 +81,10 @@ do { \ } \ } while (0) -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) { + return 0; } static inline void arch_unmap(struct mm_struct *mm, diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 759a71411169f4df318e3eb3e97e0f7602a04bb0..8611ef980554c2ef81378a087d15d42f1e3f3acb 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += auxvec.h generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 5f25b39f04d4305dbec925a85dd43a4ed421386d..c4ac6043ebb0fc44e7016d6124ce7ce780adabd3 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -298,7 +298,6 @@ void abort(void) /* if that doesn't kill us, halt */ panic("Oops failed to kill thread"); } -EXPORT_SYMBOL(abort); void __init trap_init(void) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8eed3f94bfc774de5e3f344590f8889a999dea9c..20da391b5f329885e26b30e0351d73d571d28fc1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -55,7 +55,6 @@ config X86 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_PMEM_API if X86_64 - # Causing hangs/crashes, see the commit that added this change for details. select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY @@ -89,6 +88,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP @@ -429,6 +429,19 @@ config GOLDFISH def_bool y depends on X86_GOLDFISH +config RETPOLINE + bool "Avoid speculative indirect branches in kernel" + default y + help + Compile kernel with the retpoline compiler options to guard against + kernel-to-user data leaks by avoiding speculative indirect + branches. Requires a compiler with -mindirect-branch=thunk-extern + support for full protection. The kernel may run slower. + + Without compiler support, at least indirect branches in assembler + code are eliminated. Since this includes the syscall entry path, + it is not entirely pointless. + config INTEL_RDT bool "Intel Resource Director Technology support" default n @@ -926,7 +939,8 @@ config MAXSMP config NR_CPUS int "Maximum number of CPUs" if SMP && !MAXSMP range 2 8 if SMP && X86_32 && !X86_BIGSMP - range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK + range 2 64 if SMP && X86_32 && X86_BIGSMP + range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64 range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 default "1" if !SMP default "8192" if MAXSMP diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 6293a8768a9123038eeced9e8dc2c4874feed275..672441c008c73ae3d949b974b128ef167d7d486a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER config UNWINDER_GUESS bool "Guess unwinder" depends on EXPERT + depends on !STACKDEPOT ---help--- This option enables the "guess" unwinder for unwinding kernel stack traces. It scans the stack and reports every kernel text address it diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3e73bc255e4eb3edda965a6bc493e0e6ab8de354..fad55160dcb94a28e60d537d3d69d471a1e10e2e 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare # KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +# Avoid indirect branches in kernel to deal with Spectre +ifdef CONFIG_RETPOLINE + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE + endif +endif + archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 1e9c322e973af0e1024dc3751b99d16eb76574f7..f25e1530e0644c83d8c69b1a90436c54ce823ae4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o vmlinux-objs-y += $(obj)/mem_encrypt.o + vmlinux-objs-y += $(obj)/pgtable_64.o endif $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 20919b4f31330fbc36528e47cf7dd010e516c31c..fc313e29fe2c4be637ff7d41bfafdc7a29144ba4 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -305,10 +305,18 @@ ENTRY(startup_64) leaq boot_stack_end(%rbx), %rsp #ifdef CONFIG_X86_5LEVEL - /* Check if 5-level paging has already enabled */ - movq %cr4, %rax - testl $X86_CR4_LA57, %eax - jnz lvl5 + /* + * Check if we need to enable 5-level paging. + * RSI holds real mode data and need to be preserved across + * a function call. + */ + pushq %rsi + call l5_paging_required + popq %rsi + + /* If l5_paging_required() returned zero, we're done here. */ + cmpq $0, %rax + je lvl5 /* * At this point we are in long mode with 4-level paging enabled, diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b50c42455e25257bff89dd2d9d5f23534340076e..98761a1576ceb5c21b2d8c7e98c1217fd48abb26 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -169,6 +169,16 @@ void __puthex(unsigned long value) } } +static bool l5_supported(void) +{ + /* Check if leaf 7 is supported. */ + if (native_cpuid_eax(0) < 7) + return 0; + + /* Check if la57 is supported. */ + return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)); +} + #if CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) @@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, console_init(); debug_putstr("early console in extract_kernel\n"); + if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) { + error("This linux kernel as configured requires 5-level paging\n" + "This CPU does not support the required 'cr4.la57' feature\n" + "Unable to boot - please use a kernel appropriate for your CPU\n"); + } + free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index d5364ca2e3f9290d0ba36606b7e25369f872c144..b5e5e02f8cde7fa9123dc56981c3a3a98f45843c 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -23,6 +23,9 @@ */ #undef CONFIG_AMD_MEM_ENCRYPT +/* No PAGE_TABLE_ISOLATION support needed either: */ +#undef CONFIG_PAGE_TABLE_ISOLATION + #include "misc.h" /* These actually do the work of building the kernel identity maps. */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c new file mode 100644 index 0000000000000000000000000000000000000000..b4469a37e9a16ff758c92e6159922c9dbb1b7228 --- /dev/null +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -0,0 +1,28 @@ +#include + +/* + * __force_order is used by special_insns.h asm code to force instruction + * serialization. + * + * It is not referenced from the code, but GCC < 5 with -fPIE would fail + * due to an undefined symbol. Define it to make these ancient GCCs work. + */ +unsigned long __force_order; + +int l5_paging_required(void) +{ + /* Check if leaf 7 is supported. */ + + if (native_cpuid_eax(0) < 7) + return 0; + + /* Check if la57 is supported. */ + if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return 0; + + /* Check if 5-level paging has already been enabled. */ + if (native_read_cr4() & X86_CR4_LA57) + return 0; + + return 1; +} diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh index 49f4970f693b3bddacad0b2965acad0ae112cb03..6a10d52a41452d941c22e197fa50b4ebd93ba0d5 100644 --- a/arch/x86/boot/genimage.sh +++ b/arch/x86/boot/genimage.sh @@ -44,9 +44,9 @@ FDINITRD=$6 # Make sure the files actually exist verify "$FBZIMAGE" -verify "$MTOOLSRC" genbzdisk() { + verify "$MTOOLSRC" mformat a: syslinux $FIMAGE echo "$KCMDLINE" | mcopy - a:syslinux.cfg @@ -57,6 +57,7 @@ genbzdisk() { } genfdimage144() { + verify "$MTOOLSRC" dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null mformat v: syslinux $FIMAGE @@ -68,6 +69,7 @@ genfdimage144() { } genfdimage288() { + verify "$MTOOLSRC" dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null mformat w: syslinux $FIMAGE @@ -78,39 +80,43 @@ genfdimage288() { mcopy $FBZIMAGE w:linux } -genisoimage() { +geniso() { tmp_dir=`dirname $FIMAGE`/isoimage rm -rf $tmp_dir mkdir $tmp_dir - for i in lib lib64 share end ; do + for i in lib lib64 share ; do for j in syslinux ISOLINUX ; do if [ -f /usr/$i/$j/isolinux.bin ] ; then isolinux=/usr/$i/$j/isolinux.bin - cp $isolinux $tmp_dir fi done for j in syslinux syslinux/modules/bios ; do if [ -f /usr/$i/$j/ldlinux.c32 ]; then ldlinux=/usr/$i/$j/ldlinux.c32 - cp $ldlinux $tmp_dir fi done if [ -n "$isolinux" -a -n "$ldlinux" ] ; then break fi - if [ $i = end -a -z "$isolinux" ] ; then - echo 'Need an isolinux.bin file, please install syslinux/isolinux.' - exit 1 - fi done + if [ -z "$isolinux" ] ; then + echo 'Need an isolinux.bin file, please install syslinux/isolinux.' + exit 1 + fi + if [ -z "$ldlinux" ] ; then + echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.' + exit 1 + fi + cp $isolinux $tmp_dir + cp $ldlinux $tmp_dir cp $FBZIMAGE $tmp_dir/linux echo "$KCMDLINE" > $tmp_dir/isolinux.cfg if [ -f "$FDINITRD" ] ; then cp "$FDINITRD" $tmp_dir/initrd.img fi - mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \ - -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \ - $tmp_dir + genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \ + -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \ + -boot-info-table $tmp_dir isohybrid $FIMAGE 2>/dev/null || true rm -rf $tmp_dir } @@ -119,6 +125,6 @@ case $1 in bzdisk) genbzdisk;; fdimage144) genfdimage144;; fdimage288) genfdimage288;; - isoimage) genisoimage;; + isoimage) geniso;; *) echo 'Unknown image format'; exit 1; esac diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 16627fec80b26b211ba3c4b50c3850c134e2bd62..3d09e3aca18dad33ba0c27c3673e49dcbfac0ce8 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include #include +#include /* * The following macros are used to move an (un)aligned 16 byte value to/from @@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - call *%r11 + CALL_NOSPEC %r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index f7c495e2863cb0daecb7b023e19e393cfc32c587..a14af6eb09cb074a1dcf42d0adaaf1c388ceb30b 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -17,6 +17,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way: vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index eee5b3982cfd3c6838a9a34534409df8935eba17..b66bbfa62f50d7c05ddb29b57a322979f64a6999 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -12,6 +12,7 @@ #include #include +#include #define CAMELLIA_TABLE_BYTE_LEN 272 @@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way: vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - call *%r9; + CALL_NOSPEC %r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 7a7de27c6f415206f2c3b8e5a8a43d4468bc7aaa..d9b734d0c8cc78ecdc3293ca117546eb538a12fc 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -45,6 +45,7 @@ #include #include +#include ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction @@ -172,7 +173,7 @@ continue_block: movzxw (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp - jmp *bufp + JMP_NOSPEC bufp ################################################################ ## 2a) PROCESS FULL BLOCKS: diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 399a29d067d6367603714633fb8c4de6ab77275a..cb91a64a99e7cdbc0422227383611378fb6b076a 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.src.virt.addr, walk.dst.virt.addr, diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 3fd8bc560faece4cb6253e87a4c092965a73e7af..3f48f695d5e6ac6546a009c734fcac517564b24d 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -1,6 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include +#include +#include +#include +#include /* @@ -187,6 +192,148 @@ For 32-bit we have the following conventions - kernel is built with #endif .endm +#ifdef CONFIG_PAGE_TABLE_ISOLATION + +/* + * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two + * halves: + */ +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) + +.macro SET_NOFLUSH_BIT reg:req + bts $X86_CR3_PCID_NOFLUSH_BIT, \reg +.endm + +.macro ADJUST_KERNEL_CR3 reg:req + ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID + /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg +.endm + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + ADJUST_KERNEL_CR3 \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +#define THIS_CPU_user_pcid_flush_mask \ + PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask + +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + mov %cr3, \scratch_reg + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * Test if the ASID needs a flush. + */ + movq \scratch_reg, \scratch_reg2 + andq $(0x7FF), \scratch_reg /* mask ASID */ + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + /* Flush needed, clear the bit */ + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + movq \scratch_reg2, \scratch_reg + jmp .Lwrcr3_pcid_\@ + +.Lnoflush_\@: + movq \scratch_reg2, \scratch_reg + SET_NOFLUSH_BIT \scratch_reg + +.Lwrcr3_pcid_\@: + /* Flip the ASID to the user version */ + orq $(PTI_USER_PCID_MASK), \scratch_reg + +.Lwrcr3_\@: + /* Flip the PGD to the user version */ + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg + mov \scratch_reg, %cr3 +.Lend_\@: +.endm + +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req + pushq %rax + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax + popq %rax +.endm + +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI + movq %cr3, \scratch_reg + movq \scratch_reg, \save_reg + /* + * Test the user pagetable bit. If set, then the user page tables + * are active. If clear CR3 already has the kernel page table + * active. + */ + bt $PTI_USER_PGTABLE_BIT, \scratch_reg + jnc .Ldone_\@ + + ADJUST_KERNEL_CR3 \scratch_reg + movq \scratch_reg, %cr3 + +.Ldone_\@: +.endm + +.macro RESTORE_CR3 scratch_reg:req save_reg:req + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI + + ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID + + /* + * KERNEL pages can always resume with NOFLUSH as we do + * explicit flushes. + */ + bt $PTI_USER_PGTABLE_BIT, \save_reg + jnc .Lnoflush_\@ + + /* + * Check if there's a pending flush for the user ASID we're + * about to set. + */ + movq \save_reg, \scratch_reg + andq $(0x7FF), \scratch_reg + bt \scratch_reg, THIS_CPU_user_pcid_flush_mask + jnc .Lnoflush_\@ + + btr \scratch_reg, THIS_CPU_user_pcid_flush_mask + jmp .Lwrcr3_\@ + +.Lnoflush_\@: + SET_NOFLUSH_BIT \save_reg + +.Lwrcr3_\@: + /* + * The CR3 write could be avoided when not changing its value, + * but would require a CR3 read *and* a scratch register. + */ + movq \save_reg, %cr3 +.Lend_\@: +.endm + +#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ + +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req +.endm +.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req +.endm +.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req +.endm +.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req +.endm +.macro RESTORE_CR3 scratch_reg:req save_reg:req +.endm + +#endif + #endif /* CONFIG_X86_64 */ /* diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 4838037f97f6edffda62b5b045c837fcc29402f0..a1f28a54f23a42e85436ed607293bea14486391c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -44,6 +44,7 @@ #include #include #include +#include .section .entry.text, "ax" @@ -290,7 +291,7 @@ ENTRY(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - call *%ebx + CALL_NOSPEC %ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -919,7 +920,7 @@ common_exception: movl %ecx, %es TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - call *%edi + CALL_NOSPEC %edi jmp ret_from_exception END(common_exception) @@ -941,9 +942,10 @@ ENTRY(debug) movl %esp, %eax # pt_regs pointer /* Are we currently on the SYSENTER stack? */ - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Ldebug_from_sysenter_stack TRACE_IRQS_OFF @@ -984,9 +986,10 @@ ENTRY(nmi) movl %esp, %eax # pt_regs pointer /* Are we currently on the SYSENTER stack? */ - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) - subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ - cmpl $SIZEOF_SYSENTER_stack, %ecx + movl PER_CPU_VAR(cpu_entry_area), %ecx + addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx + subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ + cmpl $SIZEOF_entry_stack, %ecx jb .Lnmi_from_sysenter_stack /* Not on SYSENTER stack. */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f81d50d7ceacdefa06d61482687937096c68421c..4f8e1d35a97ca7071a11f33f07e2b87c2eac7acf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -23,7 +23,6 @@ #include #include #include -#include "calling.h" #include #include #include @@ -38,8 +37,11 @@ #include #include #include +#include #include +#include "calling.h" + .code64 .section .entry.text, "ax" @@ -140,6 +142,67 @@ END(native_usergs_sysret64) * with them due to bugs in both AMD and Intel CPUs. */ + .pushsection .entry_trampoline, "ax" + +/* + * The code in here gets remapped into cpu_entry_area's trampoline. This means + * that the assembler and linker have the wrong idea as to where this code + * lives (and, in fact, it's mapped more than once, so it's not even at a + * fixed address). So we can't reference any symbols outside the entry + * trampoline and expect it to work. + * + * Instead, we carefully abuse %rip-relative addressing. + * _entry_trampoline(%rip) refers to the start of the remapped) entry + * trampoline. We can thus find cpu_entry_area with this macro: + */ + +#define CPU_ENTRY_AREA \ + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) + +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ +#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \ + SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA + +ENTRY(entry_SYSCALL_64_trampoline) + UNWIND_HINT_EMPTY + swapgs + + /* Stash the user RSP. */ + movq %rsp, RSP_SCRATCH + + /* Note: using %rsp as a scratch reg. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + + /* Load the top of the task stack into RSP */ + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp + + /* Start building the simulated IRET frame. */ + pushq $__USER_DS /* pt_regs->ss */ + pushq RSP_SCRATCH /* pt_regs->sp */ + pushq %r11 /* pt_regs->flags */ + pushq $__USER_CS /* pt_regs->cs */ + pushq %rcx /* pt_regs->ip */ + + /* + * x86 lacks a near absolute jump, and we can't jump to the real + * entry text with a relative jump. We could push the target + * address and then use retq, but this destroys the pipeline on + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, + * spill RDI and restore it in a second-stage trampoline. + */ + pushq %rdi + movq $entry_SYSCALL_64_stage2, %rdi + JMP_NOSPEC %rdi +END(entry_SYSCALL_64_trampoline) + + .popsection + +ENTRY(entry_SYSCALL_64_stage2) + UNWIND_HINT_EMPTY + popq %rdi + jmp entry_SYSCALL_64_after_hwframe +END(entry_SYSCALL_64_stage2) + ENTRY(entry_SYSCALL_64) UNWIND_HINT_EMPTY /* @@ -149,6 +212,10 @@ ENTRY(entry_SYSCALL_64) */ swapgs + /* + * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it + * is not required to switch CR3. + */ movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp @@ -204,7 +271,12 @@ entry_SYSCALL_64_fastpath: * It might end up jumping to the slow path. If it jumps, RAX * and all argument registers are clobbered. */ +#ifdef CONFIG_RETPOLINE + movq sys_call_table(, %rax, 8), %rax + call __x86_indirect_thunk_rax +#else call *sys_call_table(, %rax, 8) +#endif .Lentry_SYSCALL_64_after_fastpath_call: movq %rax, RAX(%rsp) @@ -330,8 +402,25 @@ syscall_return_via_sysret: popq %rsi /* skip rcx */ popq %rdx popq %rsi + + /* + * Now all regs are restored except RSP and RDI. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + + pushq RSP-RDI(%rdi) /* RSP */ + pushq (%rdi) /* RDI */ + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + popq %rdi - movq RSP-ORIG_RAX(%rsp), %rsp + popq %rsp USERGS_SYSRET64 END(entry_SYSCALL_64) @@ -359,7 +448,7 @@ ENTRY(stub_ptregs_64) jmp entry_SYSCALL64_slow_path 1: - jmp *%rax /* Called from C */ + JMP_NOSPEC %rax /* Called from C */ END(stub_ptregs_64) .macro ptregs_stub func @@ -438,7 +527,7 @@ ENTRY(ret_from_fork) 1: /* kernel thread */ movq %r12, %rdi - call *%rbx + CALL_NOSPEC %rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -466,12 +555,13 @@ END(irq_entries_start) .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY - pushfq - testl $X86_EFLAGS_IF, (%rsp) + pushq %rax + SAVE_FLAGS(CLBR_RAX) + testl $X86_EFLAGS_IF, %eax jz .Lokay_\@ ud2 .Lokay_\@: - addq $8, %rsp + popq %rax #endif .endm @@ -563,6 +653,13 @@ END(irq_entries_start) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func cld + + testb $3, CS-ORIG_RAX(%rsp) + jz 1f + SWAPGS + call switch_to_thread_stack +1: + ALLOC_PT_GPREGS_ON_STACK SAVE_C_REGS SAVE_EXTRA_REGS @@ -572,12 +669,8 @@ END(irq_entries_start) jz 1f /* - * IRQ from user mode. Switch to kernel gsbase and inform context - * tracking that we're in kernel mode. - */ - SWAPGS - - /* + * IRQ from user mode. + * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode * (which can take locks). Since TRACE_IRQS_OFF idempotent, @@ -630,10 +723,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) ud2 1: #endif - SWAPGS POP_EXTRA_REGS - POP_C_REGS - addq $8, %rsp /* skip regs->orig_ax */ + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rax + popq %rcx + popq %rdx + popq %rsi + + /* + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. + * Save old stack pointer and switch to trampoline stack. + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + + /* Copy the IRET frame to the trampoline stack. */ + pushq 6*8(%rdi) /* SS */ + pushq 5*8(%rdi) /* RSP */ + pushq 4*8(%rdi) /* EFLAGS */ + pushq 3*8(%rdi) /* CS */ + pushq 2*8(%rdi) /* RIP */ + + /* Push user RDI on the trampoline stack. */ + pushq (%rdi) + + /* + * We are on the trampoline stack. All regs except RDI are live. + * We can do future final exit work right here. + */ + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + + /* Restore RDI. */ + popq %rdi + SWAPGS INTERRUPT_RETURN @@ -713,7 +839,9 @@ native_irq_return_ldt: */ pushq %rdi /* Stash user RDI */ - SWAPGS + SWAPGS /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ + movq PER_CPU_VAR(espfix_waddr), %rdi movq %rax, (0*8)(%rdi) /* user RAX */ movq (1*8)(%rsp), %rax /* user RIP */ @@ -729,7 +857,6 @@ native_irq_return_ldt: /* Now RAX == RSP. */ andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */ - popq %rdi /* Restore user RDI */ /* * espfix_stack[31:16] == 0. The page tables are set up such that @@ -740,7 +867,11 @@ native_irq_return_ldt: * still points to an RO alias of the ESPFIX stack. */ orq PER_CPU_VAR(espfix_stack), %rax - SWAPGS + + SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi + SWAPGS /* to user GS */ + popq %rdi /* Restore user RDI */ + movq %rax, %rsp UNWIND_HINT_IRET_REGS offset=8 @@ -829,7 +960,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt /* * Exception entry points. */ -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) + +/* + * Switch to the thread stack. This is called with the IRET frame and + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and + * space has not been allocated for them.) + */ +ENTRY(switch_to_thread_stack) + UNWIND_HINT_FUNC + + pushq %rdi + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi + ret +END(switch_to_thread_stack) .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) @@ -848,11 +1007,12 @@ ENTRY(\sym) ALLOC_PT_GPREGS_ON_STACK - .if \paranoid - .if \paranoid == 1 + .if \paranoid < 2 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ - jnz 1f + jnz .Lfrom_usermode_switch_stack_\@ .endif + + .if \paranoid call paranoid_entry .else call error_entry @@ -894,20 +1054,15 @@ ENTRY(\sym) jmp error_exit .endif - .if \paranoid == 1 + .if \paranoid < 2 /* - * Paranoid entry from userspace. Switch stacks and treat it + * Entry from userspace. Switch stacks and treat it * as a normal entry. This means that paranoid handlers * run in real process context if user_mode(regs). */ -1: +.Lfrom_usermode_switch_stack_\@: call error_entry - - movq %rsp, %rdi /* pt_regs pointer */ - call sync_regs - movq %rax, %rsp /* switch stack */ - movq %rsp, %rdi /* pt_regs pointer */ .if \has_error_code @@ -1119,7 +1274,11 @@ ENTRY(paranoid_entry) js 1f /* negative -> in kernel */ SWAPGS xorl %ebx, %ebx -1: ret + +1: + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 + + ret END(paranoid_entry) /* @@ -1141,6 +1300,7 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: @@ -1168,8 +1328,18 @@ ENTRY(error_entry) * from user mode due to an IRET fault. */ SWAPGS + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax .Lerror_entry_from_usermode_after_swapgs: + /* Put us onto the real thread stack. */ + popq %r12 /* save return addr in %12 */ + movq %rsp, %rdi /* arg0 = pt_regs pointer */ + call sync_regs + movq %rax, %rsp /* switch stack */ + ENCODE_FRAME_POINTER + pushq %r12 + /* * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode @@ -1206,6 +1376,7 @@ ENTRY(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax jmp .Lerror_entry_done .Lbstep_iret: @@ -1215,10 +1386,11 @@ ENTRY(error_entry) .Lerror_bad_iret: /* - * We came from an IRET to user mode, so we have user gsbase. - * Switch to kernel gsbase: + * We came from an IRET to user mode, so we have user + * gsbase and CR3. Switch to kernel gsbase and CR3: */ SWAPGS + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax /* * Pretend that the exception came from user mode: set up pt_regs @@ -1250,6 +1422,10 @@ END(error_exit) /* * Runs on exception stack. Xen PV does not go through this path at all, * so we can use real assembly here. + * + * Registers: + * %r14: Used to save/restore the CR3 of the interrupted context + * when PAGE_TABLE_ISOLATION is in use. Do not clobber. */ ENTRY(nmi) UNWIND_HINT_IRET_REGS @@ -1313,6 +1489,7 @@ ENTRY(nmi) swapgs cld + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 @@ -1565,6 +1742,8 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 568e130d932cd2a7d44393e5fc52408cffe64f34..98d5358e4041a7e144ec566f7db19ff054cedbcc 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,7 +48,11 @@ */ ENTRY(entry_SYSENTER_compat) /* Interrupts are off on entry. */ - SWAPGS_UNSAFE_STACK + SWAPGS + + /* We are about to clobber %rsp anyway, clobbering here is OK */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* @@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat) /* Interrupts are off on entry. */ swapgs - /* Stash user ESP and switch to the kernel stack. */ + /* Stash user ESP */ movl %esp, %r8d + + /* Use %rsp as scratch reg. User ESP is stashed in r8 */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp + + /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp /* Construct struct pt_regs on stack */ @@ -256,10 +265,22 @@ sysret32_from_system_call: * when the system call started, which is already known to user * code. We zero R8-R10 to avoid info leaks. */ + movq RSP-ORIG_RAX(%rsp), %rsp + + /* + * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored + * on the process stack which is not mapped to userspace and + * not readable after we SWITCH_TO_USER_CR3. Delay the CR3 + * switch until after after the last reference to the process + * stack. + * + * %r8/%r9 are zeroed before the sysret, thus safe to clobber. + */ + SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 + xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r10 - movq RSP-ORIG_RAX(%rsp), %rsp swapgs sysretl END(entry_SYSCALL_compat) @@ -306,8 +327,11 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax - /* Construct struct pt_regs on stack (iret frame is already on stack) */ pushq %rax /* pt_regs->orig_ax */ + + /* switch to thread stack expects orig_ax to be pushed */ + call switch_to_thread_stack + pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 11b13c4b43d55f8d6c8b239f478ecb302d4cfd07..f19856d95c60919c92d1679e0037d9339c4c2a65 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t) *t = result; return result; } -int time(time_t *t) +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..577fa8adb785baf5ea1c993a2bbc88adf43fbbcc 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -37,6 +37,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" @@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); + /* This should be unreachable in NATIVE mode. */ + if (WARN_ON(vsyscall_mode == NATIVE)) + return false; + if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr) return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; } +/* + * The VSYSCALL page is the only user-accessible page in the kernel address + * range. Normally, the kernel page tables can have _PAGE_USER clear, but + * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls + * are enabled. + * + * Some day we may create a "minimal" vsyscall mode in which we emulate + * vsyscalls but leave the page not present. If so, we skip calling + * this. + */ +void __init set_vsyscall_pgtable_user_bits(pgd_t *root) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_pgd(root, VSYSCALL_ADDR); + set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); + p4d = p4d_offset(pgd, VSYSCALL_ADDR); +#if CONFIG_PGTABLE_LEVELS >= 5 + p4d->p4d |= _PAGE_USER; +#endif + pud = pud_offset(p4d, VSYSCALL_ADDR); + set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); + pmd = pmd_offset(pud, VSYSCALL_ADDR); + set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER)); +} + void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) + if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); + set_vsyscall_pgtable_user_bits(swapper_pg_dir); + } BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 141e07b0621689e745582599c009f4af1d053c6c..24ffa1e88cf948ecbe9839f6d956c69a12ecd4f0 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -582,6 +582,24 @@ static __init int bts_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) return -ENODEV; + if (boot_cpu_has(X86_FEATURE_PTI)) { + /* + * BTS hardware writes through a virtual memory map we must + * either use the kernel physical map, or the user mapping of + * the AUX buffer. + * + * However, since this driver supports per-CPU and per-task inherit + * we cannot use the user mapping since it will not be availble + * if we're not running the owning process. + * + * With PTI we can't use the kernal map either, because its not + * there when we run userspace. + * + * For now, disable this driver when using PTI. + */ + return -ENODEV; + } + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09c26a4f139c125e000675689ebc983acd8ab91a..731153a4681e73f761dea8c0c15ce6757b89860e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = { __init int intel_pmu_init(void) { + struct attribute **extra_attr = NULL; + struct attribute **to_free = NULL; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; @@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void) unsigned int unused; struct extra_reg *er; int version, i; - struct attribute **extra_attr = NULL; char *name; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { @@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_attr = merge_attr(extra_attr, skl_format_attr); + to_free = extra_attr; x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); @@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } + kfree(to_free); return 0; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3674a4b6f8bd0c5f12223b8f5c16067a933450df..8156e47da7ba4c5a31e9f9436b27a96cfb7da4c3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -3,16 +3,19 @@ #include #include +#include #include +#include #include #include "../perf_event.h" +/* Waste a full page so it can be mapped into the cpu_entry_area */ +DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 -#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) -#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_FIXUP_SIZE PAGE_SIZE /* @@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu) static DEFINE_PER_CPU(void *, insn_buffer); -static int alloc_pebs_buffer(int cpu) +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + unsigned long start = (unsigned long)cea; + phys_addr_t pa; + size_t msz = 0; + + pa = virt_to_phys(addr); + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, pa, prot); + + /* + * This is a cross-CPU update of the cpu_entry_area, we must shoot down + * all TLB entries for it. + */ + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void ds_clear_cea(void *cea, size_t size) +{ + unsigned long start = (unsigned long)cea; + size_t msz = 0; + + preempt_disable(); + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); + + flush_tlb_kernel_range(start, start + size); + preempt_enable(); +} + +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu) +{ + unsigned int order = get_order(size); int node = cpu_to_node(cpu); - int max; - void *buffer, *ibuffer; + struct page *page; + + page = __alloc_pages_node(node, flags | __GFP_ZERO, order); + return page ? page_address(page) : NULL; +} + +static void dsfree_pages(const void *buffer, size_t size) +{ + if (buffer) + free_pages((unsigned long)buffer, get_order(size)); +} + +static int alloc_pebs_buffer(int cpu) +{ + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + size_t bsiz = x86_pmu.pebs_buffer_size; + int max, node = cpu_to_node(cpu); + void *buffer, *ibuffer, *cea; if (!x86_pmu.pebs) return 0; - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node); + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); if (unlikely(!buffer)) return -ENOMEM; @@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu) if (x86_pmu.intel_cap.pebs_format < 2) { ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); if (!ibuffer) { - kfree(buffer); + dsfree_pages(buffer, bsiz); return -ENOMEM; } per_cpu(insn_buffer, cpu) = ibuffer; } - - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_pebs_vaddr = buffer; + /* Update the cpu entry area mapping */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds->pebs_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL); ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size); + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max; return 0; } static void release_pebs_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.pebs) return; @@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu) kfree(per_cpu(insn_buffer, cpu)); per_cpu(insn_buffer, cpu) = NULL; - kfree((void *)(unsigned long)ds->pebs_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; + ds_clear_cea(cea, x86_pmu.pebs_buffer_size); ds->pebs_buffer_base = 0; + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); + hwev->ds_pebs_vaddr = NULL; } static int alloc_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - int node = cpu_to_node(cpu); - int max, thresh; - void *buffer; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *buffer, *cea; + int max; if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu); if (unlikely(!buffer)) { WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; } - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; + hwev->ds_bts_vaddr = buffer; + /* Update the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds->bts_buffer_base = (unsigned long) cea; + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); + ds->bts_absolute_maximum = ds->bts_buffer_base + max; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); return 0; } static void release_bts_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); + struct debug_store *ds = hwev->ds; + void *cea; if (!ds || !x86_pmu.bts) return; - kfree((void *)(unsigned long)ds->bts_buffer_base); + /* Clear the fixmap */ + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; + ds_clear_cea(cea, BTS_BUFFER_SIZE); ds->bts_buffer_base = 0; + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); + hwev->ds_bts_vaddr = NULL; } static int alloc_ds_buffer(int cpu) { - int node = cpu_to_node(cpu); - struct debug_store *ds; - - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); - if (unlikely(!ds)) - return -ENOMEM; + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store; + memset(ds, 0, sizeof(*ds)); per_cpu(cpu_hw_events, cpu).ds = ds; - return 0; } static void release_ds_buffer(int cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); } void release_ds_buffers(void) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f7aaadf9331fb75587e74a42b041d78b2a014fc0..8e4ea143ed96403d275bf6727801961db9a053d7 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -14,6 +14,8 @@ #include +#include + /* To enable MSR tracing please use the generic trace points. */ /* @@ -77,8 +79,6 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) /* @@ -95,23 +95,6 @@ struct amd_nb { PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - #define PEBS_REGS \ (PERF_REG_X86_AX | \ PERF_REG_X86_BX | \ @@ -216,6 +199,8 @@ struct cpu_hw_events { * Intel DebugStore bits */ struct debug_store *ds; + void *ds_pebs_vaddr; + void *ds_bts_vaddr; u64 pebs_enabled; int n_pebs; int n_large_pebs; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index dbfd0854651fe4fa13154e3ce04e9baa1f643637..cf5961ca867746ae2eadb4a2b6f2c9068cec48c4 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".popsection" + ".popsection\n" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR_2(oldinstr, 1, 2) \ @@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".popsection" + ".popsection\n" /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index ff700d81e91efcf89a2f2ac725b3e80d868c8621..0927cdc4f9460165a9159b1c4f7bea0e759bdbb9 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -11,7 +11,32 @@ #include #include #include +#include #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); #endif + +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_32 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); +#else +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); +INDIRECT_THUNK(8) +INDIRECT_THUNK(9) +INDIRECT_THUNK(10) +INDIRECT_THUNK(11) +INDIRECT_THUNK(12) +INDIRECT_THUNK(13) +INDIRECT_THUNK(14) +INDIRECT_THUNK(15) +#endif +INDIRECT_THUNK(ax) +INDIRECT_THUNK(bx) +INDIRECT_THUNK(cx) +INDIRECT_THUNK(dx) +INDIRECT_THUNK(si) +INDIRECT_THUNK(di) +INDIRECT_THUNK(bp) +INDIRECT_THUNK(sp) +#endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51dfa192f69d8893c8844219c0c89029..386a6900e206f6578e3b38ee7f085d36ac50a928 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -136,6 +136,7 @@ #endif #ifndef __ASSEMBLY__ +#ifndef __BPF__ /* * This output constraint should be used for any inline asm which has a "call" * instruction. Otherwise the asm may be inserted before the frame pointer @@ -145,5 +146,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP); #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif +#endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h new file mode 100644 index 0000000000000000000000000000000000000000..4a7884b8dca55bc58f077a90ad93d6398bddd053 --- /dev/null +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _ASM_X86_CPU_ENTRY_AREA_H +#define _ASM_X86_CPU_ENTRY_AREA_H + +#include +#include +#include + +/* + * cpu_entry_area is a percpu region that contains things needed by the CPU + * and early entry/exit code. Real types aren't used for all fields here + * to avoid circular header dependencies. + * + * Every field is a virtual alias of some other allocated backing store. + * There is no direct allocation of a struct cpu_entry_area. + */ +struct cpu_entry_area { + char gdt[PAGE_SIZE]; + + /* + * The GDT is just below entry_stack and thus serves (on x86_64) as + * a a read-only guard page. + */ + struct entry_stack_page entry_stack_page; + + /* + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because + * we need task switches to work, and task switches write to the TSS. + */ + struct tss_struct tss; + + char entry_trampoline[PAGE_SIZE]; + +#ifdef CONFIG_X86_64 + /* + * Exception stacks used for IST entries. + * + * In the future, this should have a separate slot for each stack + * with guard pages between them. + */ + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; +#endif +#ifdef CONFIG_CPU_SUP_INTEL + /* + * Per CPU debug store for Intel performance monitoring. Wastes a + * full page at the moment. + */ + struct debug_store cpu_debug_store; + /* + * The actual PEBS/BTS buffers must be mapped to user space + * Reserve enough fixmap PTEs. + */ + struct debug_store_buffers cpu_debug_buffers; +#endif +}; + +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); + +extern void setup_cpu_entry_areas(void); +extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); + +#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE +#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) + +#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) + +#define CPU_ENTRY_AREA_MAP_SIZE \ + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + +extern struct cpu_entry_area *get_cpu_entry_area(int cpu); + +static inline struct entry_stack *cpu_entry_stack(int cpu) +{ + return &get_cpu_entry_area(cpu)->entry_stack_page.stack; +} + +#endif diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bf6a76202a779ee131b4df8c89449ab52abd0a79..ea9a7dde62e5c4d551ba89e429f911fb5c6603fd 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); set_bit(bit, (unsigned long *)cpu_caps_set); \ } while (0) +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) + #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c0b0e9e8aa66eb645eba71784e80aa93b0f0df79..f275447862f4583909d14b6e2469c16e4a81b951 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,14 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -266,6 +269,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -339,5 +343,8 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 4011cb03ef08e52db15f52779ce366c26359a34b..13c5ee878a477902b8494532b24853b6c156a170 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; + /* Set the ACCESS bit so it can be mapped RO */ + desc->type |= 1; desc->s = 1; desc->dpl = 0x3; @@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void) return this_cpu_ptr(&gdt_page)->gdt; } -/* Get the fixmap index for a specific processor */ -static inline unsigned int get_cpu_gdt_ro_index(int cpu) -{ - return FIX_GDT_REMAP_BEGIN + cpu; -} - /* Provide the fixmap address of the remapped GDT */ static inline struct desc_struct *get_cpu_gdt_ro(int cpu) { - unsigned int idx = get_cpu_gdt_ro_index(cpu); - return (struct desc_struct *)__fix_to_virt(idx); + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; } /* Provide the current read-only GDT */ @@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, #endif } -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) { struct desc_struct *d = get_cpu_gdt_rw(cpu); tss_desc tss; diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 14d6d50073142b0f49b06850ccd0d394546479ee..b027633e73003e121d7c043438ac7dbd10fc07a4 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 0211029076ea8b9ed6648b9bf298c99c8b2124ad..6777480d8a427eaaa07559f77985c125aa66bb6c 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_ESPFIX_H #define _ASM_X86_ESPFIX_H -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_ESPFIX64 #include @@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); extern void init_espfix_ap(int cpu); - -#endif /* CONFIG_X86_64 */ +#else +static inline void init_espfix_ap(int cpu) { } +#endif #endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b0c505fe9a958c701fef6d96f281bb8ab1a773de..64c4a30e0d39621ff8587fc8da538cd3d1d9f144 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP; PAGE_SIZE) #endif - /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at @@ -84,7 +83,6 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif - FIX_RO_IDT, /* Virtual mapping for read-only IDT */ #ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -100,9 +98,6 @@ enum fixed_addresses { #ifdef CONFIG_X86_INTEL_MID FIX_LNW_VRTC, #endif - /* Fixmap entries to remap the GDTs, one per processor. */ - FIX_GDT_REMAP_BEGIN, - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, #ifdef CONFIG_ACPI_APEI_GHES /* Used for GHES mapping from assorted contexts */ @@ -143,7 +138,7 @@ enum fixed_addresses { extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) extern int fixmaps_set; diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 1b0a5abcd8aeb6e700013c5434aaeb0bba7a152f..96aa6b9884dc5b3bc8d54c9ef1c6258eea13a0d0 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -20,16 +20,7 @@ #ifndef _ASM_X86_HYPERVISOR_H #define _ASM_X86_HYPERVISOR_H -#ifdef CONFIG_HYPERVISOR_GUEST - -#include -#include -#include - -/* - * x86 hypervisor information - */ - +/* x86 hypervisor types */ enum x86_hypervisor_type { X86_HYPER_NATIVE = 0, X86_HYPER_VMWARE, @@ -39,6 +30,12 @@ enum x86_hypervisor_type { X86_HYPER_KVM, }; +#ifdef CONFIG_HYPERVISOR_GUEST + +#include +#include +#include + struct hypervisor_x86 { /* Hypervisor name */ const char *name; @@ -58,7 +55,15 @@ struct hypervisor_x86 { extern enum x86_hypervisor_type x86_hyper_type; extern void init_hypervisor_platform(void); +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return x86_hyper_type == type; +} #else static inline void init_hypervisor_platform(void) { } +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) +{ + return type == X86_HYPER_NATIVE; +} #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h new file mode 100644 index 0000000000000000000000000000000000000000..62a9f4966b4298ec2f3aa6c32d0881bd4296956c --- /dev/null +++ b/arch/x86/include/asm/intel_ds.h @@ -0,0 +1,36 @@ +#ifndef _ASM_INTEL_DS_H +#define _ASM_INTEL_DS_H + +#include + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 8 + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +} __aligned(PAGE_SIZE); + +DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store); + +struct debug_store_buffers { + char bts_buffer[BTS_BUFFER_SIZE]; + char pebs_buffer[PEBS_BUFFER_SIZE]; +}; + +#endif diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h new file mode 100644 index 0000000000000000000000000000000000000000..989cfa86de85184359e0205de9237f9e048e5e20 --- /dev/null +++ b/arch/x86/include/asm/invpcid.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INVPCID +#define _ASM_X86_INVPCID + +static inline void __invpcid(unsigned long pcid, unsigned long addr, + unsigned long type) +{ + struct { u64 d[2]; } desc = { { pcid, addr } }; + + /* + * The memory clobber is because the whole point is to invalidate + * stale TLB entries and, especially if we're flushing global + * mappings, we don't want the compiler to reorder any subsequent + * memory accesses before the TLB flush. + * + * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and + * invpcid (%rcx), %rax in long mode. + */ + asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" + : : "m" (desc), "a" (type), "c" (&desc) : "memory"); +} + +#define INVPCID_TYPE_INDIV_ADDR 0 +#define INVPCID_TYPE_SINGLE_CTXT 1 +#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 +#define INVPCID_TYPE_ALL_NON_GLOBAL 3 + +/* Flush all mappings for a given pcid and addr, not including globals. */ +static inline void invpcid_flush_one(unsigned long pcid, + unsigned long addr) +{ + __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); +} + +/* Flush all mappings for a given PCID, not including globals. */ +static inline void invpcid_flush_single_context(unsigned long pcid) +{ + __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); +} + +/* Flush all mappings, including globals, for all PCIDs. */ +static inline void invpcid_flush_all(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL); +} + +/* Flush all mappings for all PCIDs except globals. */ +static inline void invpcid_flush_all_nonglobals(void) +{ + __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL); +} + +#endif /* _ASM_X86_INVPCID */ diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 139feef467f7e298c6f9db57c43facc64f5468b6..c066ffae222b769996b78c13ee5d063dc7c5c544 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); extern int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early); + struct irq_data *irq_data, bool reserve); extern void mp_irqdomain_deactivate(struct irq_domain *domain, struct irq_data *irq_data); extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain); diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index c8ef23f2c28f17c59308b9c41179c47f85e075ad..89f08955fff733c688a5ce4f4a0b8d74050ee617 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif #else #define INTERRUPT_RETURN iret #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index f86a8caa561e8873c3f34e6e8b8cd509ebadd819..395c9631e000a3a17aa574c1b25fcc2cafd5b5fb 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, int all); +extern void show_iret_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000 --- a/arch/x86/include/asm/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 034caa1a084e360ff74c77e84116bb0de6e28dcd..b24b1c8b397989304ec88d9979c66b1b5415f78e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -214,8 +214,6 @@ struct x86_emulate_ops { void (*halt)(struct x86_emulate_ctxt *ctxt); void (*wbinvd)(struct x86_emulate_ctxt *ctxt); int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt); - void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */ - void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */ int (*intercept)(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 977de5fb968be412862de87aeb95136bae69e620..51679843132829e38ed204eda60d50379a4fabb5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -536,7 +536,20 @@ struct kvm_vcpu_arch { struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; + /* + * QEMU userspace and the guest each have their own FPU state. + * In vcpu_run, we switch between the user and guest FPU contexts. + * While running a VCPU, the VCPU thread will have the guest FPU + * context. + * + * Note that while the PKRU state lives inside the fpu registers, + * it is switched out separately at VMENTER and VMEXIT time. The + * "guest_fpu" state here contains the guest FPU context, with the + * host PRKU bits. + */ + struct fpu user_fpu; struct fpu guest_fpu; + u64 xcr0; u64 guest_supported_xcr0; u32 guest_xstate_size; @@ -1435,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 9ea26f16749706fddd5b15e8bf557a9e6156e165..5ff3e8af2c2056b7fe19560ee2ba1ad7146aaf2a 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -3,6 +3,7 @@ #define _ASM_X86_MMU_H #include +#include #include #include @@ -27,7 +28,8 @@ typedef struct { atomic64_t tlb_gen; #ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; + struct rw_semaphore ldt_usr_sem; + struct ldt_struct *ldt; #endif #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6d16d15d09a0daed96a1e3d670b6203d1779b98e..c931b88982a0ff59e3b67947cc606e452f327dc0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -50,22 +50,53 @@ struct ldt_struct { * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ - struct desc_struct *entries; - unsigned int nr_entries; + struct desc_struct *entries; + unsigned int nr_entries; + + /* + * If PTI is in use, then the entries array is not mapped while we're + * in user mode. The whole array will be aliased at the addressed + * given by ldt_slot_va(slot). We use two slots so that we can allocate + * and map, and enable a new LDT without invalidating the mapping + * of an older, still-in-use LDT. + * + * slot will be -1 if this LDT doesn't have an alias mapping. + */ + int slot; }; +/* This is a multiple of PAGE_SIZE. */ +#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) + +static inline void *ldt_slot_va(int slot) +{ +#ifdef CONFIG_X86_64 + return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); +#else + BUG(); +#endif +} + /* * Used for LDT copy/destruction. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm); +static inline void init_new_context_ldt(struct mm_struct *mm) +{ + mm->context.ldt = NULL; + init_rwsem(&mm->context.ldt_usr_sem); +} +int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); +void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ -static inline int init_new_context_ldt(struct task_struct *tsk, - struct mm_struct *mm) +static inline void init_new_context_ldt(struct mm_struct *mm) { } +static inline int ldt_dup_context(struct mm_struct *oldmm, + struct mm_struct *mm) { return 0; } -static inline void destroy_context_ldt(struct mm_struct *mm) {} +static inline void destroy_context_ldt(struct mm_struct *mm) { } +static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif static inline void load_mm_ldt(struct mm_struct *mm) @@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm) * that we can see. */ - if (unlikely(ldt)) - set_ldt(ldt->entries, ldt->nr_entries); - else + if (unlikely(ldt)) { + if (static_cpu_has(X86_FEATURE_PTI)) { + if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { + /* + * Whoops -- either the new LDT isn't mapped + * (if slot == -1) or is mapped into a bogus + * slot (if slot > 1). + */ + clear_LDT(); + return; + } + + /* + * If page table isolation is enabled, ldt->entries + * will not be mapped in the userspace pagetables. + * Tell the CPU to access the LDT through the alias + * at ldt_slot_va(ldt->slot). + */ + set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); + } else { + set_ldt(ldt->entries, ldt->nr_entries); + } + } else { clear_LDT(); + } #else clear_LDT(); #endif @@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mutex_init(&mm->context.lock); + mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); - #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and always allocated */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } - #endif - return init_new_context_ldt(tsk, mm); +#endif + init_new_context_ldt(mm); + return 0; } static inline void destroy_context(struct mm_struct *mm) { @@ -176,15 +231,16 @@ do { \ } while (0) #endif -static inline void arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { paravirt_arch_dup_mmap(oldmm, mm); + return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); + ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 @@ -281,33 +337,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, return __pkru_allows_pkey(vma_pkey(vma), write); } -/* - * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID - * bits. This serves two purposes. It prevents a nasty situation in - * which PCID-unaware code saves CR3, loads some other value (with PCID - * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if - * the saved ASID was nonzero. It also means that any bugs involving - * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger - * deterministically. - */ - -static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid) -{ - if (static_cpu_has(X86_FEATURE_PCID)) { - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1); - } else { - VM_WARN_ON_ONCE(asid != 0); - return __sme_pa(mm->pgd); - } -} - -static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) -{ - VM_WARN_ON_ONCE(asid > 4094); - return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH; -} - /* * This can be used from process context to figure out what the value of * CR3 is without needing to do a (slow) __read_cr3(). @@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid) */ static inline unsigned long __get_current_cr3_fast(void) { - unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm), + unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* For now, be very restrictive about when this can be called. */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 5400add2885b9646fa7d8d53c5b35e16c2794c7d..8bf450b13d9f53883db9ee89ac6d39913963907e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent @@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return U64_MAX; __asm__ __volatile__("mov %4, %%r8\n" - "call *%5" + CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) - : "r" (output_address), "m" (hv_hypercall_pg) + : "r" (output_address), + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); #else u32 input_address_hi = upper_32_bits(input_address); @@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (!hv_hypercall_pg) return U64_MAX; - __asm__ __volatile__("call *%7" + __asm__ __volatile__(CALL_NOSPEC : "=A" (hv_status), "+c" (input_address_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input_address_hi), "D"(output_address_hi), "S"(output_address_lo), - "m" (hv_hypercall_pg) + THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); #endif /* !x86_64 */ return hv_status; @@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) #ifdef CONFIG_X86_64 { - __asm__ __volatile__("call *%4" + __asm__ __volatile__(CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) - : "m" (hv_hypercall_pg) + : THUNK_TARGET(hv_hypercall_pg) : "cc", "r8", "r9", "r10", "r11"); } #else @@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) u32 input1_hi = upper_32_bits(input1); u32 input1_lo = lower_32_bits(input1); - __asm__ __volatile__ ("call *%5" + __asm__ __volatile__ (CALL_NOSPEC : "=A"(hv_status), "+c"(input1_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input1_hi), - "m" (hv_hypercall_pg) + THUNK_TARGET(hv_hypercall_pg) : "cc", "edi", "esi"); } #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 34c4922bbc3fb5ed95cb0819d7ac6b9cd37a7f41..e7b983a355060a6c5b5db76f1fc18475eb647ae5 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -355,6 +355,9 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h new file mode 100644 index 0000000000000000000000000000000000000000..402a11c803c38b0bf8de9d5664e5113249427d47 --- /dev/null +++ b/arch/x86/include/asm/nospec-branch.h @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NOSPEC_BRANCH_H__ +#define __NOSPEC_BRANCH_H__ + +#include +#include +#include + +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include ") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + +#ifdef __ASSEMBLY__ + +/* + * This should be used immediately before a retpoline alternative. It tells + * objtool where the retpolines are so that it can make sense of the control + * flow by just reading the original instruction(s) and ignoring the + * alternatives. + */ +.macro ANNOTATE_NOSPEC_ALTERNATIVE + .Lannotate_\@: + .pushsection .discard.nospec + .long .Lannotate_\@ - . + .popsection +.endm + +/* + * These are the bare retpoline primitives for indirect jmp and call. + * Do not use these directly; they only exist to make the ALTERNATIVE + * invocation below less ugly. + */ +.macro RETPOLINE_JMP reg:req + call .Ldo_rop_\@ +.Lspec_trap_\@: + pause + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov \reg, (%_ASM_SP) + ret +.endm + +/* + * This is a wrapper around RETPOLINE_JMP so the called function in reg + * returns to the instruction after the macro. + */ +.macro RETPOLINE_CALL reg:req + jmp .Ldo_call_\@ +.Ldo_retpoline_jmp_\@: + RETPOLINE_JMP \reg +.Ldo_call_\@: + call .Ldo_retpoline_jmp_\@ +.endm + +/* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 + * attack. + */ +.macro JMP_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(jmp *\reg), \ + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD +#else + jmp *\reg +#endif +.endm + +.macro CALL_NOSPEC reg:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE_2 __stringify(call *\reg), \ + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD +#else + call *\reg +#endif +.endm + + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +#ifdef CONFIG_RETPOLINE + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: +#endif +.endm + +#else /* __ASSEMBLY__ */ + +#define ANNOTATE_NOSPEC_ALTERNATIVE \ + "999:\n\t" \ + ".pushsection .discard.nospec\n\t" \ + ".long 999b - .\n\t" \ + ".popsection\n\t" + +#if defined(CONFIG_X86_64) && defined(RETPOLINE) + +/* + * Since the inline asm uses the %V modifier which is only in newer GCC, + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + */ +# define CALL_NOSPEC \ + ANNOTATE_NOSPEC_ALTERNATIVE \ + ALTERNATIVE( \ + "call *%[thunk_target]\n", \ + "call __x86_indirect_thunk_%V[thunk_target]\n", \ + X86_FEATURE_RETPOLINE) +# define THUNK_TARGET(addr) [thunk_target] "r" (addr) + +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +/* + * For i386 we use the original ret-equivalent retpoline, because + * otherwise we'll run out of registers. We don't care about CET + * here, anyway. + */ +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ + " jmp 904f;\n" \ + " .align 16\n" \ + "901: call 903f;\n" \ + "902: pause;\n" \ + " jmp 902b;\n" \ + " .align 16\n" \ + "903: addl $4, %%esp;\n" \ + " pushl %[thunk_target];\n" \ + " ret;\n" \ + " .align 16\n" \ + "904: call 901b;\n", \ + X86_FEATURE_RETPOLINE) + +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#else /* No retpoline for C / inline asm */ +# define CALL_NOSPEC "call *%[thunk_target]\n" +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif + +/* The Spectre V2 mitigation variants */ +enum spectre_v2_mitigation { + SPECTRE_V2_NONE, + SPECTRE_V2_RETPOLINE_MINIMAL, + SPECTRE_V2_RETPOLINE_MINIMAL_AMD, + SPECTRE_V2_RETPOLINE_GENERIC, + SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_IBRS, +}; + +/* + * On VMEXIT we must ensure that no RSB predictions learned in the guest + * can be followed in the host, by overwriting the RSB completely. Both + * retpoline and IBRS mitigations for Spectre v2 need this; only on future + * CPUs with IBRS_ATT *might* it be avoided. + */ +static inline void vmexit_fill_RSB(void) +{ +#ifdef CONFIG_RETPOLINE + unsigned long loops = RSB_CLEAR_LOOPS / 2; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=&r" (loops), ASM_CALL_CONSTRAINT + : "r" (loops) : "memory" ); +#endif +} +#endif /* __ASSEMBLY__ */ +#endif /* __NOSPEC_BRANCH_H__ */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 283efcaac8aff86f2c004bc23e4b8642cbf3d527..892df375b6155a51f584760efb9f9e77c3f732e8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -927,6 +927,15 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(clobbers) \ + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) +#endif + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 7a5d6695abd37e737bfb0e532ec4d86c53256433..eb66fa9cd0fc6187d55dc0c1a8fb30e5e32687a8 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -38,6 +38,7 @@ do { \ #define PCI_NOASSIGN_ROMS 0x80000 #define PCI_ROOT_NO_CRS 0x100000 #define PCI_NOASSIGN_BARS 0x200000 +#define PCI_BIG_ROOT_WINDOW 0x400000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 4b5e1eafada731cdedd48d771d79d55e766a84eb..aff42e1da6ee1591bbec15ce6f0543953789d19c 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} */ extern gfp_t __userpte_alloc_gfp; +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Instead of one PGD, we acquire two PGDs. Being order-1, it is + * both 8k in size and 8k-aligned. That lets us just flip bit 12 + * in a pointer to swap between the two 4k halves. + */ +#define PGD_ALLOCATION_ORDER 1 +#else +#define PGD_ALLOCATION_ORDER 0 +#endif + /* * Allocate and free page tables. */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 95e2dfd755218ccfaf6417b44c822b545a35568e..e42b8943cb1a311a00ddceb36129ede3012489ef 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); void ptdump_walk_pgd_level_checkwx(void); #ifdef CONFIG_DEBUG_WX @@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) static inline int p4d_bad(p4d_t p4d) { - return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; + unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (p4d_flags(p4d) & ~ignore_flags) != 0; } #endif /* CONFIG_PGTABLE_LEVELS > 3 */ @@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + unsigned long ignore_flags = _PAGE_USER; + + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + ignore_flags |= _PAGE_NX; + + return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) +/* + * a shortcut to get a pgd_t in a given mm + */ +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's @@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud) */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + /* Clone the user space pgd as well */ + memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); +#endif } #define PTE_SHIFT ilog2(PTRS_PER_PTE) diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index f2ca9b28fd68303f4494775564aa9da77ddcd53a..ce245b0cdfcaa42bd932a387bbb189ee7349bfef 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ #define LAST_PKMAP 1024 #endif -#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ - & PMD_MASK) +/* + * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c + * to avoid include recursion hell + */ +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) + +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) + +#define PKMAP_BASE \ + ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) #else -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE) #endif #define MODULES_VADDR VMALLOC_START diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e9f05331e732a057b341e78977d666b8cfe35289..81462e9a34f6af49645a08f55c7d67e0144dbb77 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp) #endif } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and + * the user one is in the last 4k. To switch between them, you + * just need to flip the 12th bit in their addresses. + */ +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT + +/* + * This generates better code than the inline assembly in + * __set_bit(). + */ +static inline void *ptr_set_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr |= BIT(bit); + return (void *)__ptr; +} +static inline void *ptr_clear_bit(void *ptr, int bit) +{ + unsigned long __ptr = (unsigned long)ptr; + + __ptr &= ~BIT(bit); + return (void *)__ptr; +} + +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) +{ + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) +{ + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) +{ + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} + +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) +{ + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); +} +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ + +/* + * Page table pages are page-aligned. The lower half of the top + * level is used for userspace and the top half for the kernel. + * + * Returns true for parts of the PGD that map userspace and + * false for the parts that map the kernel. + */ +static inline bool pgdp_maps_userspace(void *__ptr) +{ + unsigned long ptr = (unsigned long)__ptr; + + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); +} + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); + +/* + * Take a PGD location (pgdp) and a pgd value that needs to be set there. + * Populates the user and returns the resulting PGD that must be set in + * the kernel copy of the page tables. + */ +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return pgd; + return __pti_set_user_pgd(pgdp, pgd); +} +#else +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + return pgd; +} +#endif + static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); +#else *p4dp = p4d; +#endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { +#ifdef CONFIG_PAGE_TABLE_ISOLATION + *pgdp = pti_set_user_pgd(pgdp, pgd); +#else *pgdp = pgd; +#endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6d5f45dcd4a13caafbf184f323d0725c2d5f53e4..6b8f73dcbc2c2b6a835e17ac0828d8b6484f4dce 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -75,33 +75,52 @@ typedef struct { pteval_t pte; } pte_t; #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +/* + * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * + * Be very careful vs. KASLR when changing anything here. The KASLR address + * range must not overlap with anything except the KASAN shadow area, which + * is correct as KASAN disables KASLR. + */ +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) + #ifdef CONFIG_X86_5LEVEL -#define VMALLOC_SIZE_TB _AC(16384, UL) -#define __VMALLOC_BASE _AC(0xff92000000000000, UL) -#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define VMALLOC_SIZE_TB _AC(12800, UL) +# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) +# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) +# define LDT_PGD_ENTRY _AC(-112, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else -#define VMALLOC_SIZE_TB _AC(32, UL) -#define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define VMALLOC_SIZE_TB _AC(32, UL) +# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) +# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) +# define LDT_PGD_ENTRY _AC(-3, UL) +# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif + #ifdef CONFIG_RANDOMIZE_MEMORY -#define VMALLOC_START vmalloc_base -#define VMEMMAP_START vmemmap_base +# define VMALLOC_START vmalloc_base +# define VMEMMAP_START vmemmap_base #else -#define VMALLOC_START __VMALLOC_BASE -#define VMEMMAP_START __VMEMMAP_BASE +# define VMALLOC_START __VMALLOC_BASE +# define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_RANDOMIZE_MEMORY */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) + +#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) + +#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1) -#define MODULES_LEN (MODULES_END - MODULES_VADDR) -#define ESPFIX_PGD_ENTRY _AC(-2, UL) -#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) -#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) -#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) +#define MODULES_END _AC(0xffffffffff000000, UL) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT) + +#define CPU_ENTRY_AREA_PGD _AC(-4, UL) +#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT) + +#define EFI_VA_START ( -4 * (_AC(1, UL) << 30)) +#define EFI_VA_END (-68 * (_AC(1, UL) << 30)) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 43212a43ee69feea1de27275d9075c566cdfcd2c..625a52a5594f53b3ddc889843b92d228cc39a8e8 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -38,6 +38,11 @@ #define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull) #define CR3_PCID_MASK 0xFFFull #define CR3_NOFLUSH BIT_ULL(63) + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define X86_CR3_PTI_PCID_USER_BIT 11 +#endif + #else /* * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cc16fa882e3e760a40351cf3e7476ac9f25ffe00..d3a67fba200ae2a5c03f52a7815dca00b43c63ad 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -163,9 +163,9 @@ enum cpuid_regs_idx { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; -extern struct tss_struct doublefault_tss; -extern __u32 cpu_caps_cleared[NCAPINTS]; -extern __u32 cpu_caps_set[NCAPINTS]; +extern struct x86_hw_tss doublefault_tss; +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); @@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir) write_cr3(__sme_pa(pgdir)); } +/* + * Note that while the legacy 'TSS' name comes from 'Task State Segment', + * on modern x86 CPUs the TSS also holds information important to 64-bit mode, + * unrelated to the task-switch mechanism: + */ #ifdef CONFIG_X86_32 /* This is the TSS defined by the hardware. */ struct x86_hw_tss { @@ -305,7 +310,13 @@ struct x86_hw_tss { struct x86_hw_tss { u32 reserved1; u64 sp0; + + /* + * We store cpu_current_top_of_stack in sp1 so it's always accessible. + * Linux does not use ring 1, so sp1 is not otherwise needed. + */ u64 sp1; + u64 sp2; u64 reserved2; u64 ist[7]; @@ -323,12 +334,22 @@ struct x86_hw_tss { #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) #define INVALID_IO_BITMAP_OFFSET 0x8000 +struct entry_stack { + unsigned long words[64]; +}; + +struct entry_stack_page { + struct entry_stack stack; +} __aligned(PAGE_SIZE); + struct tss_struct { /* - * The hardware state: + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. */ struct x86_hw_tss x86_tss; @@ -339,18 +360,9 @@ struct tss_struct { * be within the limit. */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; +} __aligned(PAGE_SIZE); -#ifdef CONFIG_X86_32 - /* - * Space for the temporary SYSENTER stack. - */ - unsigned long SYSENTER_stack_canary; - unsigned long SYSENTER_stack[64]; -#endif - -} ____cacheline_aligned; - -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); /* * sizeof(unsigned long) coming from an extra "long" at the end @@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +#else +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 #endif /* @@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask) static inline void native_load_sp0(unsigned long sp0) { - this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } static inline void native_swapgs(void) @@ -535,12 +550,12 @@ static inline void native_swapgs(void) static inline unsigned long current_top_of_stack(void) { -#ifdef CONFIG_X86_64 - return this_cpu_read_stable(cpu_tss.x86_tss.sp0); -#else - /* sp0 on x86_32 is special in and around vm86 mode. */ + /* + * We can't read directly from tss.sp0: sp0 on x86_32 is special in + * and around vm86 mode and sp0 on x86_64 is special because of the + * entry trampoline. + */ return this_cpu_read_stable(cpu_current_top_of_stack); -#endif } static inline bool on_thread_stack(void) @@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x) #else /* - * User space process size. 47bits minus one guard page. The guard - * page is necessary on Intel CPUs: if a SYSCALL instruction is at - * the highest possible canonical userspace address, then that - * syscall will enter the kernel with a non-canonical return - * address, and SYSRET will explode dangerously. We avoid this - * particular problem by preventing anything from being mapped - * at the maximum canonical address. + * User space process size. This is the first address outside the user range. + * There are a few constraints that determine this: + * + * On Intel CPUs, if a SYSCALL instruction is at the highest canonical + * address, then that syscall will enter the kernel with a + * non-canonical return address, and SYSRET will explode dangerously. + * We avoid this particular problem by preventing anything executable + * from being mapped at the maximum canonical address. + * + * On AMD CPUs in the Ryzen family, there's a nasty bug in which the + * CPUs malfunction if they execute code from the highest canonical page. + * They'll speculate right off the end of the canonical space, and + * bad things happen. This is worked around in the same way as the + * Intel problem. + * + * With page table isolation enabled, we map the LDT in ... [stay tuned] */ #define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE) diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h new file mode 100644 index 0000000000000000000000000000000000000000..0b5ef05b2d2d9f03a967afe10256a266ef242c98 --- /dev/null +++ b/arch/x86/include/asm/pti.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _ASM_X86_PTI_H +#define _ASM_X86_PTI_H +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +extern void pti_init(void); +extern void pti_check_boottime_disable(void); +#else +static inline void pti_check_boottime_disable(void) { } +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PTI_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b20f9d623f9c639fd212c5e313bd5e450747b1eb..8f09012b92e779d7aabf4ad663b8eb10b2379c37 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -236,11 +236,23 @@ */ #define EARLY_IDT_HANDLER_SIZE 9 +/* + * xen_early_idt_handler_array is for Xen pv guests: for each entry in + * early_idt_handler_array it contains a prequel in the form of + * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to + * max 8 bytes. + */ +#define XEN_EARLY_IDT_HANDLER_SIZE 8 + #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; extern void early_ignore_irq(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE]; +#endif + /* * Load a segment. Fall back on loading the zero segment if something goes * wrong. This variant assumes that loading zero fully clears the segment. diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 8da111b3c342bbb61a9e630e101c8a83422a15ea..f737068787729f045a578776845231b0a0ee3e0d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -16,6 +16,7 @@ enum stack_type { STACK_TYPE_TASK, STACK_TYPE_IRQ, STACK_TYPE_SOFTIRQ, + STACK_TYPE_ENTRY, STACK_TYPE_EXCEPTION, STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, }; @@ -28,6 +29,8 @@ struct stack_info { bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info); +bool in_entry_stack(unsigned long *stack, struct stack_info *info); + int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask); diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 982c325dad3377a4f0d80a5808f4d731a87926a5..8be6afb584715dc8d5a50d1bbd989bf053366294 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -12,7 +12,13 @@ /* image of the saved processor state */ struct saved_context { - u16 es, fs, gs, ss; + /* + * On x86_32, all segment registers, with the possible exception of + * gs, are saved at kernel entry in pt_regs. + */ +#ifdef CONFIG_X86_32_LAZY_GS + u16 gs; +#endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; bool misc_enable_saved; diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 7306e911faee20694908bdf482166546776a7e92..a7af9f53c0cb773d05fd84ebe94525a1971e3ebb 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -20,8 +20,20 @@ */ struct saved_context { struct pt_regs regs; - u16 ds, es, fs, gs, ss; - unsigned long gs_base, gs_kernel_base, fs_base; + + /* + * User CS and SS are saved in current_pt_regs(). The rest of the + * segment selectors need to be saved and restored here. + */ + u16 ds, es, fs, gs; + + /* + * Usermode FSBASE and GSBASE may not match the fs and gs selectors, + * so we save them separately. We save the kernelmode GSBASE to + * restore percpu access after resume. + */ + unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; + unsigned long cr0, cr2, cr3, cr4, cr8; u64 misc_enable; bool misc_enable_saved; @@ -30,8 +42,7 @@ struct saved_context { u16 gdt_pad; /* Unused */ struct desc_ptr gdt_desc; u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct desc_ptr idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 8c6bd6863db9d6b737cd0649324c154f9b9798a3..eb5f7999a8934c7af8b6c5b170b05e5772264783 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *prev, - struct task_struct *next) +static inline void prepare_switch_to(struct task_struct *next) { #ifdef CONFIG_VMAP_STACK /* @@ -70,7 +69,7 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(prev, next); \ + prepare_switch_to(next); \ \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) @@ -79,10 +78,10 @@ do { \ static inline void refresh_sysenter_cs(struct thread_struct *thread) { /* Only happens when SEP is enabled, no need to test "SEP"arately: */ - if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs)) + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) return; - this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs); + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } #endif @@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread) /* This is used when switching tasks or entering/exiting vm86 mode. */ static inline void update_sp0(struct task_struct *task) { + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ #ifdef CONFIG_X86_32 load_sp0(task->thread.sp0); #else - load_sp0(task_top_of_stack(task)); + if (static_cpu_has(X86_FEATURE_XENPV)) + load_sp0(task_top_of_stack(task)); #endif } diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 70f425947dc50f3e99ca639c0ead0d7e1cce636d..00223333821a96616647a9cbb6fe729c4a18b7b6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack, #else /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_64 -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) #endif #endif diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 509046cfa5ce893357366348468a5c5ff8e86a09..d33e4a26dc7ed2db51687ddb4ca7ee3a35ddf835 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -9,70 +9,130 @@ #include #include #include +#include +#include +#include -static inline void __invpcid(unsigned long pcid, unsigned long addr, - unsigned long type) -{ - struct { u64 d[2]; } desc = { { pcid, addr } }; +/* + * The x86 feature is called PCID (Process Context IDentifier). It is similar + * to what is traditionally called ASID on the RISC processors. + * + * We don't use the traditional ASID implementation, where each process/mm gets + * its own ASID and flush/restart when we run out of ASID space. + * + * Instead we have a small per-cpu array of ASIDs and cache the last few mm's + * that came by on this CPU, allowing cheaper switch_mm between processes on + * this CPU. + * + * We end up with different spaces for different things. To avoid confusion we + * use different names for each of them: + * + * ASID - [0, TLB_NR_DYN_ASIDS-1] + * the canonical identifier for an mm + * + * kPCID - [1, TLB_NR_DYN_ASIDS] + * the value we write into the PCID part of CR3; corresponds to the + * ASID+1, because PCID 0 is special. + * + * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS] + * for KPTI each mm has two address spaces and thus needs two + * PCID values, but we can still do with a single ASID denomination + * for each mm. Corresponds to kPCID + 2048. + * + */ - /* - * The memory clobber is because the whole point is to invalidate - * stale TLB entries and, especially if we're flushing global - * mappings, we don't want the compiler to reorder any subsequent - * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. - */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); -} +/* There are 12 bits of space for ASIDS in CR3 */ +#define CR3_HW_ASID_BITS 12 -#define INVPCID_TYPE_INDIV_ADDR 0 -#define INVPCID_TYPE_SINGLE_CTXT 1 -#define INVPCID_TYPE_ALL_INCL_GLOBAL 2 -#define INVPCID_TYPE_ALL_NON_GLOBAL 3 +/* + * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for + * user/kernel switches + */ +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define PTI_CONSUMED_PCID_BITS 1 +#else +# define PTI_CONSUMED_PCID_BITS 0 +#endif -/* Flush all mappings for a given pcid and addr, not including globals. */ -static inline void invpcid_flush_one(unsigned long pcid, - unsigned long addr) -{ - __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR); -} +#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS) + +/* + * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account + * for them being zero-based. Another -1 is because PCID 0 is reserved for + * use by non-PCID-aware users. + */ +#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2) -/* Flush all mappings for a given PCID, not including globals. */ -static inline void invpcid_flush_single_context(unsigned long pcid) +/* + * 6 because 6 should be plenty and struct tlb_state will fit in two cache + * lines. + */ +#define TLB_NR_DYN_ASIDS 6 + +/* + * Given @asid, compute kPCID + */ +static inline u16 kern_pcid(u16 asid) { - __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT); + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * Make sure that the dynamic ASID space does not confict with the + * bit we are using to switch between user and kernel ASIDs. + */ + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT)); + + /* + * The ASID being passed in here should have respected the + * MAX_ASID_AVAILABLE and thus never have the switch bit set. + */ + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT)); +#endif + /* + * The dynamically-assigned ASIDs that get passed in are small + * (context.tlb_gen); - smp_mb__after_atomic(); - - return new_tlb_gen; + VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); + VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } #ifdef CONFIG_PARAVIRT @@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void) return !static_cpu_has(X86_FEATURE_PCID); } -/* - * 6 because 6 should be plenty and struct tlb_state will fit in - * two cache lines. - */ -#define TLB_NR_DYN_ASIDS 6 - struct tlb_context { u64 ctx_id; u64 tlb_gen; @@ -138,6 +192,24 @@ struct tlb_state { */ bool is_lazy; + /* + * If set we changed the page tables in such a way that we + * needed an invalidation of all contexts (aka. PCIDs / ASIDs). + * This tells us to go invalidate all the non-loaded ctxs[] + * on the next context switch. + * + * The current ctx was kept up-to-date as it ran and does not + * need to be invalidated. + */ + bool invalidate_other; + + /* + * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate + * the corresponding user PCID needs a flush next time we + * switch to it; see SWITCH_TO_USER_CR3. + */ + unsigned short user_pcid_flush_mask; + /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. @@ -173,40 +245,43 @@ static inline void cr4_init_shadow(void) this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } +static inline void __cr4_set(unsigned long cr4) +{ + lockdep_assert_irqs_disabled(); + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) { - cr4 |= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 | mask) != cr4) + __cr4_set(cr4 | mask); + local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) { - cr4 &= ~mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); + local_irq_restore(flags); } -static inline void cr4_toggle_bits(unsigned long mask) +static inline void cr4_toggle_bits_irqsoff(unsigned long mask) { unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - cr4 ^= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); + __cr4_set(cr4 ^ mask); } /* Read the CR4 shadow. */ @@ -215,6 +290,14 @@ static inline unsigned long cr4_read_shadow(void) return this_cpu_read(cpu_tlbstate.cr4); } +/* + * Mark all other ASIDs as invalid, preserves the current. + */ +static inline void invalidate_other_asid(void) +{ + this_cpu_write(cpu_tlbstate.invalidate_other, true); +} + /* * Save some of cr4 feature set we're using (e.g. Pentium 4MB * enable and PPro Global page enable), so that any CPU's that boot @@ -234,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask) extern void initialize_tlbstate_and_flush(void); -static inline void __native_flush_tlb(void) +/* + * Given an ASID, flush the corresponding user ASID. We can delay this + * until the next time we switch to it. + * + * See SWITCH_TO_USER_CR3. + */ +static inline void invalidate_user_asid(u16 asid) { + /* There is no user ASID if address space separation is off */ + if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) + return; + /* - * If current->mm == NULL then we borrow a mm which may change during a - * task switch and therefore we must not be preempted while we write CR3 - * back: + * We only have a single ASID if PCID is off and the CR3 + * write will have flushed it. */ - preempt_disable(); - native_write_cr3(__native_read_cr3()); - preempt_enable(); + if (!cpu_feature_enabled(X86_FEATURE_PCID)) + return; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + __set_bit(kern_pcid(asid), + (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); } -static inline void __native_flush_tlb_global_irq_disabled(void) +/* + * flush the entire current user mapping + */ +static inline void __native_flush_tlb(void) { - unsigned long cr4; + /* + * Preemption or interrupts must be disabled to protect the access + * to the per CPU variable and to prevent being preempted between + * read_cr3() and write_cr3(). + */ + WARN_ON_ONCE(preemptible()); - cr4 = this_cpu_read(cpu_tlbstate.cr4); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* If current->mm == NULL then the read_cr3() "borrows" an mm */ + native_write_cr3(__native_read_cr3()); } +/* + * flush everything + */ static inline void __native_flush_tlb_global(void) { - unsigned long flags; + unsigned long cr4, flags; if (static_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes * to CR4 sandwiched inside an IRQ flag save/restore. + * + * Note, this works with CR4.PCIDE=0 or 1. */ invpcid_flush_all(); return; @@ -277,36 +386,69 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - __native_flush_tlb_global_irq_disabled(); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + /* toggle PGE */ + native_write_cr4(cr4 ^ X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); raw_local_irq_restore(flags); } +/* + * flush one page in the user mapping + */ static inline void __native_flush_tlb_single(unsigned long addr) { + u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); + asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. + * Just use invalidate_user_asid() in case we are called early. + */ + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) + invalidate_user_asid(loaded_mm_asid); + else + invpcid_flush_one(user_pcid(loaded_mm_asid), addr); } +/* + * flush everything + */ static inline void __flush_tlb_all(void) { - if (boot_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) { __flush_tlb_global(); - else + } else { + /* + * !PGE -> !PCID (setup_pcid()), thus every flush is total. + */ __flush_tlb(); - - /* - * Note: if we somehow had PCID but not PGE, then this wouldn't work -- - * we'd end up flushing kernel translations for the current ASID but - * we might fail to flush kernel translations for other cached ASIDs. - * - * To avoid this issue, we force PCID off if PGE is off. - */ + } } +/* + * flush one page in the kernel mapping + */ static inline void __flush_tlb_one(unsigned long addr) { count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + /* + * __flush_tlb_single() will have cleared the TLB entry for this ASID, + * but since kernel space is replicated across all, we must also + * invalidate all others. + */ + invalidate_other_asid(); } #define TLB_FLUSH_ALL -1UL @@ -367,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) void native_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info); +static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) +{ + /* + * Bump the generation count. This also serves as a full barrier + * that synchronizes with switch_mm(): callers are required to order + * their read of mm_cpumask after their writes to the paging + * structures. + */ + return atomic64_inc_return(&mm->context.tlb_gen); +} + static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm) { diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 84b9ec0c1bc0867795c3a2d327f3b8831bf4f8ba..22647a642e98c41508dab9976a1ff4665b97a4d2 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed, DECLARE_EVENT_CLASS(vector_activate, TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve, - bool early), + bool reserve), - TP_ARGS(irq, is_managed, can_reserve, early), + TP_ARGS(irq, is_managed, can_reserve, reserve), TP_STRUCT__entry( __field( unsigned int, irq ) __field( bool, is_managed ) __field( bool, can_reserve ) - __field( bool, early ) + __field( bool, reserve ) ), TP_fast_assign( __entry->irq = irq; __entry->is_managed = is_managed; __entry->can_reserve = can_reserve; - __entry->early = early; + __entry->reserve = reserve; ), - TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d", + TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d", __entry->irq, __entry->is_managed, __entry->can_reserve, - __entry->early) + __entry->reserve) ); #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \ DEFINE_EVENT_FN(vector_activate, name, \ TP_PROTO(unsigned int irq, bool is_managed, \ - bool can_reserve, bool early), \ - TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \ + bool can_reserve, bool reserve), \ + TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \ DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate); DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 1fadd310ff680ece697fa65a8db410c380a8547e..31051f35cbb768e452c4f76a60c5415a45f572e7 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index e9cc6fe1fc6f953c38ddcc61fcf06fd90d72ab04..1f86e1b0a5cdc1afeb4667e8c770ec81456e9fb4 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -7,6 +7,9 @@ #include #include +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) + struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; @@ -52,15 +55,28 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +/* + * If 'partial' returns true, only the iret frame registers are valid. + */ +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { if (unwind_done(state)) return NULL; + if (partial) { +#ifdef CONFIG_UNWINDER_ORC + *partial = !state->full_regs; +#else + *partial = false; +#endif + } + return state->regs; } #else -static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) +static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, + bool *partial) { return NULL; } diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d9a7c659009c94b2ee04b4ddcc94e5874cc403b0..b986b2ca688a0e4fa24ace613c7e465e909723fe 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -7,6 +7,7 @@ #ifdef CONFIG_X86_VSYSCALL_EMULATION extern void map_vsyscall(void); +extern void set_vsyscall_pgtable_user_bits(pgd_t *root); /* * Called on instruction fetch fault in vsyscall page. diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 7cb282e9e58777aeb2ffd86b344d39a5189c5f84..bfd882617613923f0db79c4aae82690bda7a8d4f 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -217,9 +218,9 @@ privcmd_call(unsigned call, __HYPERCALL_5ARG(a1, a2, a3, a4, a5); stac(); - asm volatile("call *%[call]" + asm volatile(CALL_NOSPEC : __HYPERCALL_5PARAM - : [call] "a" (&hypercall_page[call]) + : [thunk_target] "a" (&hypercall_page[call]) : __HYPERCALL_CLOBBER5); clac(); diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild index da1489cb64dce5fcec622f2869cd26d2b58d00ac..1e901e421f2db09d4a0c2c543f47b0e5bbc8be62 100644 --- a/arch/x86/include/uapi/asm/Kbuild +++ b/arch/x86/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h generated-y += unistd_32.h generated-y += unistd_64.h generated-y += unistd_x32.h diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 7e1e730396ae08f5a267adaccf0c3ba46448f780..bcba3c643e63dced1c873ee5e1cdbfdd5d307928 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -78,7 +78,12 @@ #define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) #define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ #define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) -#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ + +#define X86_CR3_PCID_BITS 12 +#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) + +#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ +#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) /* * Intel CPU features in CR4 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index dbaf14d69ebd510b86a4a0e9e83bbd1862d99d14..4817d743c26359c697559053a23518a5e25b241b 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) { unsigned long flags; + int i; - if (instr[0] != 0x90) - return; + for (i = 0; i < a->padlen; i++) { + if (instr[i] != 0x90) + return; + } local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 6e272f3ea984a220975d1b7c144867840fb867a9..880441f2414610298002c34652b96e571772e08f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg) apic_verbosity = APIC_DEBUG; else if (strcmp("verbose", arg) == 0) apic_verbosity = APIC_VERBOSE; +#ifdef CONFIG_X86_64 else { pr_warning("APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", arg); return -EINVAL; } +#endif return 0; } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index aa85690e9b6416b797db8129c8f1cbd7606bf687..25a87028cb3fe9bb4a6e9eba757d50cd49f15620 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 7b659c4480c91a680466cc961cee68754c633ada..5078b5ce63a7aaf4569368e6447c9a4423271109 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 201579dc52428edb5c3989102a432c14799d1e1f..8a79634214600ab02076cfb601dac780996b6f3f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq, } int mp_irqdomain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { unsigned long flags; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 9b18be76442236eab971cab86cbf366f9e1e134f..ce503c99f5c4cd67fddb5fafdda7e3e64b8e7690 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL : MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU : - MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_REDIRECTION_CPU | MSI_ADDR_DEST_ID(cfg->dest_apicid); msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED : - MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_DELIVERY_FIXED | MSI_DATA_VECTOR(cfg->vector); } diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index fa22017de806504b79593ead951bffe4e5d5421b..02e8acb134f856faa0030bf1c007dc3ac1bd9ca3 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = { .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6a823a25eaff03787660bd1f92e587362259e54d..f8b03bb8e72560a436dbad677ddeed30f19531cf 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd) irq_matrix_reserve(vector_matrix); apicd->can_reserve = true; apicd->has_reserved = true; + irqd_set_can_reserve(irqd); trace_vector_reserve(irqd->irq, 0); vector_assign_managed_shutdown(irqd); } @@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd) int ret; ret = assign_irq_vector_any_locked(irqd); - if (!ret) + if (!ret) { apicd->has_reserved = false; + /* + * Core might have disabled reservation mode after + * allocating the irq descriptor. Ideally this should + * happen before allocation time, but that would require + * completely convoluted ways of transporting that + * information. + */ + if (!irqd_can_reserve(irqd)) + apicd->can_reserve = false; + } return ret; } @@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd) } static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, - bool early) + bool reserve) { struct apic_chip_data *apicd = apic_chip_data(irqd); unsigned long flags; int ret = 0; trace_vector_activate(irqd->irq, apicd->is_managed, - apicd->can_reserve, early); + apicd->can_reserve, reserve); /* Nothing to do for fixed assigned vectors */ if (!apicd->can_reserve && !apicd->is_managed) return 0; raw_spin_lock_irqsave(&vector_lock, flags); - if (early || irqd_is_managed_and_shutdown(irqd)) + if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd, } else { /* Release the vector */ apicd->can_reserve = true; + irqd_set_can_reserve(irqd); clear_irq_vector(irqd); realloc = true; } @@ -542,8 +554,8 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, } #ifdef CONFIG_GENERIC_IRQ_DEBUGFS -void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, - struct irq_data *irqd, int ind) +static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind) { unsigned int cpu, vector, prev_cpu, prev_vector; struct apic_chip_data *apicd; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 622f13ca8a943c270e70df6100561ce4c56d7db2..8b04234e010b2616e42bff6cbeabdc3c2087c13c 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, - .irq_delivery_mode = dest_LowestPrio, + .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 1, /* logical */ .disable_esr = 0, diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 8ea78275480dafeb702e11ba73364cd9e7c52f21..76417a9aab73c3f7e3376261eb9ec592b16adf3b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_XEN #include @@ -93,4 +94,13 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + + /* TLB state for the entry code */ + OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); + + /* Layout info for cpu_entry_area */ + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); + OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page); + DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack)); } diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index dedf428b20b68b0a4748fc1ac3032193c9121362..fa1261eefa16e73cedf27aadb878753be693f919 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -47,13 +47,8 @@ void foo(void) BLANK(); /* Offset from the sysenter stack to tss.sp0 */ - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - - offsetofend(struct tss_struct, SYSENTER_stack)); - - /* Offset from cpu_tss to SYSENTER_stack */ - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); - /* Size of SYSENTER_stack */ - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - + offsetofend(struct cpu_entry_area, entry_stack_page.stack)); #ifdef CONFIG_CC_STACKPROTECTOR BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 630212fa9b9da3f0498fc30d4c193c5926c43abb..bf51e51d808dd8914abd3b4bca69b37ce3ec023b 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -23,6 +23,9 @@ int main(void) #ifdef CONFIG_PARAVIRT OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); +#ifdef CONFIG_DEBUG_ENTRY + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); +#endif BLANK(); #endif @@ -63,6 +66,7 @@ int main(void) OFFSET(TSS_ist, tss_struct, x86_tss.ist); OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); BLANK(); #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d58184b7cd4438144e2d0ac3f4744d19ff4ffb31..ea831c85819583c3dd0b0a48c2a31518eaa67b44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x17: init_amd_zn(c); break; } - /* Enable workaround for FXSAVE leak */ - if (c->x86 >= 6) + /* + * Enable workaround for FXSAVE leak on CPUs + * without a XSaveErPtr feature + */ + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); cpu_detect_cache_sizes(c); @@ -826,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_K8); if (cpu_has(c, X86_FEATURE_XMM2)) { - /* MFENCE stops RDTSC speculation */ - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + unsigned long long val; + int ret; + + /* + * A serializing LFENCE has less overhead than MFENCE, so + * use it for execution serialization. On families which + * don't have that MSR, LFENCE is already serializing. + * msr_set_bit() uses the safe accessors, too, even if the MSR + * is not present. + */ + msr_set_bit(MSR_F10H_DECFG, + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + + /* + * Verify that the MSR write was successful (could be running + * under a hypervisor) and only then assume that LFENCE is + * serializing. + */ + ret = rdmsrl_safe(MSR_F10H_DECFG, &val); + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) { + /* A serializing LFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); + } else { + /* MFENCE stops RDTSC speculation */ + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + } } /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index ba0b2424c9b050100ac6eeef97c4aef45181d455..e4dc26185aa70bcdecf8ca932b6c720121819558 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -10,6 +10,10 @@ */ #include #include +#include + +#include +#include #include #include #include @@ -20,6 +24,8 @@ #include #include +static void __init spectre_v2_select_mitigation(void); + void __init check_bugs(void) { identify_boot_cpu(); @@ -29,6 +35,9 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* Select the proper spectre mitigation before patching alternatives */ + spectre_v2_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -60,3 +69,179 @@ void __init check_bugs(void) set_memory_4k((unsigned long)__va(0), 1); #endif } + +/* The kernel command line selection */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", +}; + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; + +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s\n", reason); +} + +static inline bool retp_compiler(void) +{ + return __is_defined(RETPOLINE); +} + +static inline bool match_option(const char *arg, int arglen, const char *opt) +{ + int len = strlen(opt); + + return len == arglen && !strncmp(arg, opt, len); +} + +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) +{ + char arg[20]; + int ret; + + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, + sizeof(arg)); + if (ret > 0) { + if (match_option(arg, ret, "off")) { + goto disable; + } else if (match_option(arg, ret, "on")) { + spec2_print_if_secure("force enabled on command line."); + return SPECTRE_V2_CMD_FORCE; + } else if (match_option(arg, ret, "retpoline")) { + spec2_print_if_insecure("retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE; + } else if (match_option(arg, ret, "retpoline,amd")) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + return SPECTRE_V2_CMD_AUTO; + } + spec2_print_if_insecure("AMD retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_AMD; + } else if (match_option(arg, ret, "retpoline,generic")) { + spec2_print_if_insecure("generic retpoline selected on command line."); + return SPECTRE_V2_CMD_RETPOLINE_GENERIC; + } else if (match_option(arg, ret, "auto")) { + return SPECTRE_V2_CMD_AUTO; + } + } + + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) + return SPECTRE_V2_CMD_AUTO; +disable: + spec2_print_if_insecure("disabled on command line."); + return SPECTRE_V2_CMD_NONE; +} + +static void __init spectre_v2_select_mitigation(void) +{ + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE; + + /* + * If the CPU is not affected and the command line mode is NONE or AUTO + * then nothing to do. + */ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) && + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO)) + return; + + switch (cmd) { + case SPECTRE_V2_CMD_NONE: + return; + + case SPECTRE_V2_CMD_FORCE: + /* FALLTRHU */ + case SPECTRE_V2_CMD_AUTO: + goto retpoline_auto; + + case SPECTRE_V2_CMD_RETPOLINE_AMD: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_amd; + break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_generic; + break; + case SPECTRE_V2_CMD_RETPOLINE: + if (IS_ENABLED(CONFIG_RETPOLINE)) + goto retpoline_auto; + break; + } + pr_err("kernel not compiled with retpoline; no mitigation available!"); + return; + +retpoline_auto: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + retpoline_amd: + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing. Switching to generic retpoline\n"); + goto retpoline_generic; + } + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : + SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } else { + retpoline_generic: + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : + SPECTRE_V2_RETPOLINE_MINIMAL; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + } + + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); +} + +#undef pr_fmt + +#ifdef CONFIG_SYSFS +ssize_t cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return sprintf(buf, "Not affected\n"); + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) + return sprintf(buf, "Not affected\n"); + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); +} +#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fa998ca8aa5aa5b4899dbe8a57c5b543f927009e..ef29ad001991d6acdd5b59cd707d85f9ff99f9ea 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) return NULL; /* Not found */ } -__u32 cpu_caps_cleared[NCAPINTS]; -__u32 cpu_caps_set[NCAPINTS]; +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; void load_percpu_segment(int cpu) { @@ -490,28 +490,23 @@ void load_percpu_segment(int cpu) load_stack_canary_segment(); } -/* Setup the fixmap mapping only once per-processor */ -static inline void setup_fixmap_gdt(int cpu) -{ -#ifdef CONFIG_X86_64 - /* On 64-bit systems, we use a read-only fixmap GDT. */ - pgprot_t prot = PAGE_KERNEL_RO; -#else - /* - * On native 32-bit systems, the GDT cannot be read-only because - * our double fault handler uses a task gate, and entering through - * a task gate needs to change an available TSS to busy. If the GDT - * is read-only, that will triple fault. - * - * On Xen PV, the GDT must be read-only because the hypervisor requires - * it. - */ - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? - PAGE_KERNEL_RO : PAGE_KERNEL; +#ifdef CONFIG_X86_32 +/* The 32-bit entry code needs to find cpu_entry_area. */ +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); #endif - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); -} +#ifdef CONFIG_X86_64 +/* + * Special IST stacks which the CPU switches to when it calls + * an IST-marked descriptor entry. Up to 7 stacks (hardware + * limit), all of them are 4K, except the debug stack which + * is 8K. + */ +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ +}; +#endif /* Load the original GDT from the per-cpu structure */ void load_direct_gdt(int cpu) @@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) { int i; - for (i = 0; i < NCAPINTS; i++) { + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { c->x86_capability[i] &= ~cpu_caps_cleared[i]; c->x86_capability[i] |= cpu_caps_set[i]; } @@ -927,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) } setup_force_cpu_cap(X86_FEATURE_ALWAYS); + + if (c->x86_vendor != X86_VENDOR_AMD) + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + fpu__init_system(c); #ifdef CONFIG_X86_32 @@ -1250,7 +1252,7 @@ void enable_sep_cpu(void) return; cpu = get_cpu(); - tss = &per_cpu(cpu_tss, cpu); + tss = &per_cpu(cpu_tss_rw, cpu); /* * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- @@ -1259,11 +1261,7 @@ void enable_sep_cpu(void) tss->x86_tss.ss1 = __KERNEL_CS; wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); - - wrmsr(MSR_IA32_SYSENTER_ESP, - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), - 0); - + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); put_cpu(); @@ -1357,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -/* - * Special IST stacks which the CPU switches to when it calls - * an IST-marked descriptor entry. Up to 7 stacks (hardware - * limit), all of them are 4K, except the debug stack which - * is 8K. - */ -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, - [DEBUG_STACK - 1] = DEBUG_STKSZ -}; - -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); - /* May not be marked __init: used by software suspend */ void syscall_init(void) { + extern char _entry_trampoline[]; + extern char entry_SYSCALL_64_trampoline[]; + + int cpu = smp_processor_id(); + unsigned long SYSCALL64_entry_trampoline = + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + + (entry_SYSCALL_64_trampoline - _entry_trampoline); + wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); + if (static_cpu_has(X86_FEATURE_PTI)) + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); + else + wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); #ifdef CONFIG_IA32_EMULATION wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); @@ -1386,7 +1381,7 @@ void syscall_init(void) * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). */ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); @@ -1530,7 +1525,7 @@ void cpu_init(void) if (cpu) load_ucode_ap(); - t = &per_cpu(cpu_tss, cpu); + t = &per_cpu(cpu_tss_rw, cpu); oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1569,7 +1564,7 @@ void cpu_init(void) * set up and load the per-CPU TSS */ if (!oist->ist[0]) { - char *estacks = per_cpu(exception_stacks, cpu); + char *estacks = get_cpu_entry_area(cpu)->exception_stacks; for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; @@ -1580,7 +1575,7 @@ void cpu_init(void) } } - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; /* * <= is required because the CPU will access up to @@ -1596,11 +1591,12 @@ void cpu_init(void) enter_lazy_tlb(&init_mm, me); /* - * Initialize the TSS. Don't bother initializing sp0, as the initial - * task never enters user mode. + * Initialize the TSS. sp0 points to the entry trampoline stack + * regardless of what task is running. */ - set_tss_desc(cpu, t); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); + load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1)); load_mm_ldt(&init_mm); @@ -1612,7 +1608,6 @@ void cpu_init(void) if (is_uv_system()) uv_cpu_init(); - setup_fixmap_gdt(cpu); load_fixmap_gdt(cpu); } @@ -1622,7 +1617,7 @@ void cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(cpu_tss, cpu); + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); wait_for_master_cpu(cpu); @@ -1657,12 +1652,12 @@ void cpu_init(void) * Initialize the TSS. Don't bother initializing sp0, as the initial * task never enters user mode. */ - set_tss_desc(cpu, t); + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); load_mm_ldt(&init_mm); - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; #ifdef CONFIG_DOUBLEFAULT /* Set up doublefault TSS pointer in the GDT */ @@ -1674,7 +1669,6 @@ void cpu_init(void) fpu__init_cpu(); - setup_fixmap_gdt(cpu); load_fixmap_gdt(cpu); } #endif diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c6daec4bdba5b180e45c5f78019fcba7b2880428..330b8462d426faad0dccdc480f34eec34cd8b92f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, #define F14H_MPB_MAX_SIZE 1824 #define F15H_MPB_MAX_SIZE 4096 #define F16H_MPB_MAX_SIZE 3458 +#define F17H_MPB_MAX_SIZE 3200 switch (family) { case 0x14: @@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, case 0x16: max_size = F16H_MPB_MAX_SIZE; break; + case 0x17: + max_size = F17H_MPB_MAX_SIZE; + break; default: max_size = F1XH_MPB_MAX_SIZE; break; diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 7dbcb7adf7975f7f29c38651c23c478ad315a34c..d9e460fc7a3b309327c8f2899cc54a7be2b9dc6b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci) } #else -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - static inline void print_ucode(struct ucode_cpu_info *uci) { struct microcode_intel *mc; @@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (rev != mc->hdr.rev) return -1; -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif uci->cpu_sig.rev = rev; if (early) @@ -923,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) { - pr_err_once("late loading on model 79 is disabled.\n"); + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 + * may result in a system hang. This behavior is documented in item + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == INTEL_FAM6_BROADWELL_X && + c->x86_mask == 0x01 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); return true; } diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0e662c55ae902fedd5c78c1ed87a972b35a79856..0b8cedb20d6d92f2875a49292680c8cfecd5b044 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -50,25 +50,23 @@ static void doublefault_fn(void) cpu_relax(); } -struct tss_struct doublefault_tss __cacheline_aligned = { - .x86_tss = { - .sp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - - .ip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .flags = X86_EFLAGS_SF | 0x2, - .sp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, - - .__cr3 = __pa_nodebug(swapper_pg_dir), - } +struct x86_hw_tss doublefault_tss __cacheline_aligned = { + .sp0 = STACK_START, + .ss0 = __KERNEL_DS, + .ldt = 0, + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, + + .ip = (unsigned long) doublefault_fn, + /* 0x2 bit is always set */ + .flags = X86_EFLAGS_SF | 0x2, + .sp = STACK_START, + .es = __USER_DS, + .cs = __KERNEL_CS, + .ss = __KERNEL_DS, + .ds = __USER_DS, + .fs = __KERNEL_PERCPU, + + .__cr3 = __pa_nodebug(swapper_pg_dir), }; /* dummy for do_double_fault() call */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f13b4c00a5de4b7a7b36c40d27311672bcc9d05c..afbecff161d162c11a82d0610ae406b910c3f791 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, return true; } +bool in_entry_stack(unsigned long *stack, struct stack_info *info) +{ + struct entry_stack *ss = cpu_entry_stack(smp_processor_id()); + + void *begin = ss; + void *end = ss + 1; + + if ((void *)stack < begin || (void *)stack >= end) + return false; + + info->type = STACK_TYPE_ENTRY; + info->begin = begin; + info->end = end; + info->next_sp = NULL; + + return true; +} + static void printk_stack_address(unsigned long address, int reliable, char *log_lvl) { @@ -50,6 +69,39 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +void show_iret_regs(struct pt_regs *regs) +{ + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, + regs->sp, regs->flags); +} + +static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, + bool partial) +{ + /* + * These on_stack() checks aren't strictly necessary: the unwind code + * has already validated the 'regs' pointer. The checks are done for + * ordering reasons: if the registers are on the next stack, we don't + * want to print them out yet. Otherwise they'll be shown as part of + * the wrong stack. Later, when show_trace_log_lvl() switches to the + * next stack, this function will be called again with the same regs so + * they can be printed in the right context. + */ + if (!partial && on_stack(info, regs, sizeof(*regs))) { + __show_regs(regs, 0); + + } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, + IRET_FRAME_SIZE)) { + /* + * When an interrupt or exception occurs in entry code, the + * full pt_regs might not have been saved yet. In that case + * just print the iret frame. + */ + show_iret_regs(regs); + } +} + void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl) { @@ -57,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, struct stack_info stack_info = {0}; unsigned long visit_mask = 0; int graph_idx = 0; + bool partial; printk("%sCall Trace:\n", log_lvl); unwind_start(&state, task, regs, stack); stack = stack ? : get_stack_pointer(task, regs); + regs = unwind_get_entry_regs(&state, &partial); /* * Iterate through the stacks, starting with the current stack pointer. @@ -71,31 +125,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, * - task stack * - interrupt stack * - HW exception stacks (double fault, nmi, debug, mce) + * - entry stack * - * x86-32 can have up to three stacks: + * x86-32 can have up to four stacks: * - task stack * - softirq stack * - hardirq stack + * - entry stack */ - for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { + for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { const char *stack_name; - /* - * If we overflowed the task stack into a guard page, jump back - * to the bottom of the usable stack. - */ - if (task_stack_page(task) - (void *)stack < PAGE_SIZE) - stack = task_stack_page(task); - - if (get_stack_info(stack, task, &stack_info, &visit_mask)) - break; + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); + if (get_stack_info(stack, task, &stack_info, &visit_mask)) + break; + } stack_name = stack_type_name(stack_info.type); if (stack_name) printk("%s <%s>\n", log_lvl, stack_name); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + if (regs) + show_regs_if_on_stack(&stack_info, regs, partial); /* * Scan the stack, printing any text addresses we find. At the @@ -119,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, /* * Don't print regs->ip again if it was already printed - * by __show_regs() below. + * by show_regs_if_on_stack(). */ if (regs && stack == ®s->ip) goto next; @@ -154,9 +212,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unwind_next_frame(&state); /* if the frame has entry regs, print them */ - regs = unwind_get_entry_regs(&state); - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) - __show_regs(regs, 0); + regs = unwind_get_entry_regs(&state, &partial); + if (regs) + show_regs_if_on_stack(&stack_info, regs, partial); } if (stack_name) @@ -252,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err) unsigned long sp; #endif printk(KERN_DEFAULT - "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter, + "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", - IS_ENABLED(CONFIG_KASAN) ? " KASAN" : ""); + IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", + IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? + (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index daefae83a3aa86c59602b75bd3e6734c6e3b1030..04170f63e3a1d567caac3deea641e014b7e10823 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_SOFTIRQ) return "SOFTIRQ"; + if (type == STACK_TYPE_ENTRY) + return "ENTRY_TRAMPOLINE"; + return NULL; } @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (task != current) goto unknown; + if (in_entry_stack(stack, info)) + goto recursion_check; + if (in_hardirq_stack(stack, info)) goto recursion_check; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 88ce2ffdb110303502ad33e64d357d8af5afd8c6..563e28d14f2ca157178d9de3a139d8370aaf89fe 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type) if (type == STACK_TYPE_IRQ) return "IRQ"; + if (type == STACK_TYPE_ENTRY) { + /* + * On 64-bit, we have a generic entry stack that we + * use for all the kernel entry points, including + * SYSENTER. + */ + return "ENTRY_TRAMPOLINE"; + } + if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) return exception_stack_names[type - STACK_TYPE_EXCEPTION]; @@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, if (in_irq_stack(stack, info)) goto recursion_check; + if (in_entry_stack(stack, info)) + goto recursion_check; + goto unknown; recursion_check: diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index b6c6468e10bc96625c25c4ae8d19b734af81366b..4c8440de33559942a98245cf123a5ed10a30e406 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CC_USING_FENTRY # define function_hook __fentry__ @@ -197,7 +198,8 @@ ftrace_stub: movl 0x4(%ebp), %edx subl $MCOUNT_INSN_SIZE, %eax - call *ftrace_trace_function + movl ftrace_trace_function, %ecx + CALL_NOSPEC %ecx popl %edx popl %ecx @@ -241,5 +243,5 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - jmp *%ecx + JMP_NOSPEC %ecx #endif diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index c832291d948a6b4fd487a3e42fab8b8f7b9dd244..7cb8ba08beb997ef66724e2db5e66021c5ce32ee 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -7,7 +7,7 @@ #include #include #include - +#include .code64 .section .entry.text, "ax" @@ -286,8 +286,8 @@ trace: * ip and parent ip are used and the list function is called when * function tracing is enabled. */ - call *ftrace_trace_function - + movq ftrace_trace_function, %r8 + CALL_NOSPEC %r8 restore_mcount_regs jmp fgraph_trace @@ -329,5 +329,5 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - jmp *%rdi + JMP_NOSPEC %rdi #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7dca675fe78db60c5d79bc450bbd14bfee35cfc2..04a625f0fcda322dab7c9d8459a19ee56b71c936 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag) .balign PAGE_SIZE; \ GLOBAL(name) +#ifdef CONFIG_PAGE_TABLE_ISOLATION +/* + * Each PGD needs to be 8k long and 8k aligned. We do not + * ever go out to userspace with these, so we do not + * strictly *need* the second page, but this allows us to + * have a single set_pgd() implementation that does not + * need to worry about whether it has 4k or 8k to work + * with. + * + * This ensures PGDs are 8k long: + */ +#define PTI_USER_PGD_FILL 512 +/* This ensures they are 8k-aligned: */ +#define NEXT_PGD_PAGE(name) \ + .balign 2 * PAGE_SIZE; \ +GLOBAL(name) +#else +#define NEXT_PGD_PAGE(name) NEXT_PAGE(name) +#define PTI_USER_PGD_FILL 0 +#endif + /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ @@ -350,13 +371,14 @@ GLOBAL(name) .endr __INITDATA -NEXT_PAGE(early_top_pgt) +NEXT_PGD_PAGE(early_top_pgt) .fill 511,8,0 #ifdef CONFIG_X86_5LEVEL .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #else .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC #endif + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 @@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts) .data #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .org init_top_pgt + PGD_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC + .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC @@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt) */ PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) #else -NEXT_PAGE(init_top_pgt) +NEXT_PGD_PAGE(init_top_pgt) .fill 512,8,0 + .fill PTI_USER_PGD_FILL,8,0 #endif #ifdef CONFIG_X86_5LEVEL diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 3feb648781c470a7a49ee26749712ba7da891fe9..2f723301eb58fc5ad0d6796b342446ae2ee0c9e6 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(cpu_tss, get_cpu()); + tss = &per_cpu(cpu_tss_rw, get_cpu()); if (turn_on) bitmap_clear(t->io_bitmap_ptr, from, num); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 49cfd9fe7589fa5ef2bef5d4a5d6431b7007836f..68e1867cca8045d0ed728ffc6b75a866c25484ed 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; - /* - * NB: Unlike exception entries, IRQ entries do not reliably - * handle context tracking in the low-level entry code. This is - * because syscall entries execute briefly with IRQs on before - * updating context tracking state, so we can take an IRQ from - * kernel mode with CONTEXT_USER. The low-level entry code only - * updates the context if we came from user mode, so we won't - * switch to CONTEXT_KERNEL. We'll fix that once the syscall - * code is cleaned up enough that we can cleanly defer enabling - * IRQs. - */ - entering_irq(); /* entering_irq() tells RCU that we're not quiescent. Check it. */ diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a83b3346a0e104833793687aea68f7f216cf0374..c1bdbd3d3232cb83d07bfa4ce604ae0b620c796a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -20,6 +20,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=b" (stack) : "0" (stack), - "D"(func) + [thunk_target] "D"(func) : "memory", "cc", "edx", "ecx", "eax"); } @@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) call_on_stack(print_stack_overflow, isp); asm volatile("xchgl %%ebx,%%esp \n" - "call *%%edi \n" + CALL_NOSPEC "movl %%ebx,%%esp \n" : "=a" (arg1), "=b" (isp) : "0" (desc), "1" (isp), - "D" (desc->handle_irq) + [thunk_target] "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 020efbf5786b35d343a8632cd14ac4f800465d9b..d86e344f5b3debfed504b72a7c0f83f36fe16387 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", current->comm, curbase, regs->sp, irq_stack_top, irq_stack_bottom, - estack_top, estack_bottom); + estack_top, estack_bottom, (void *)regs->ip); if (sysctl_panic_on_stackoverflow) panic("low stack detected by irq handler - check messages\n"); diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 1c1eae9613406b14c3154065e1fd036f985a384c..26d713ecad34a847e650638d02442eda04379f5e 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -5,6 +5,11 @@ * Copyright (C) 2002 Andi Kleen * * This handles calls from both 32bit and 64bit mode. + * + * Lock order: + * contex.ldt_usr_sem + * mmap_sem + * context.lock */ #include @@ -19,6 +24,7 @@ #include #include +#include #include #include #include @@ -42,17 +48,15 @@ static void refresh_ldt_segments(void) #endif } -/* context.lock is held for us, so we don't need any locking. */ +/* context.lock is held by the task which issued the smp function call */ static void flush_ldt(void *__mm) { struct mm_struct *mm = __mm; - mm_context_t *pc; if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) return; - pc = &mm->context; - set_ldt(pc->ldt->entries, pc->ldt->nr_entries); + load_mm_ldt(mm); refresh_ldt_segments(); } @@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) return NULL; } + /* The new LDT isn't aliased for PTI yet. */ + new_ldt->slot = -1; + new_ldt->nr_entries = num_entries; return new_ldt; } +/* + * If PTI is enabled, this maps the LDT into the kernelmode and + * usermode tables for the given mm. + * + * There is no corresponding unmap function. Even if the LDT is freed, we + * leave the PTEs around until the slot is reused or the mm is destroyed. + * This is harmless: the LDT is always in ordinary memory, and no one will + * access the freed slot. + * + * If we wanted to unmap freed LDTs, we'd also need to do a flush to make + * it useful, and the flush would slow down modify_ldt(). + */ +static int +map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + bool is_vmalloc, had_top_level_entry; + unsigned long va; + spinlock_t *ptl; + pgd_t *pgd; + int i; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return 0; + + /* + * Any given ldt_struct should have map_ldt_struct() called at most + * once. + */ + WARN_ON(ldt->slot != -1); + + /* + * Did we already have the top level entry allocated? We can't + * use pgd_none() for this because it doens't do anything on + * 4-level page table kernels. + */ + pgd = pgd_offset(mm, LDT_BASE_ADDR); + had_top_level_entry = (pgd->pgd != 0); + + is_vmalloc = is_vmalloc_addr(ldt->entries); + + for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { + unsigned long offset = i << PAGE_SHIFT; + const void *src = (char *)ldt->entries + offset; + unsigned long pfn; + pte_t pte, *ptep; + + va = (unsigned long)ldt_slot_va(slot) + offset; + pfn = is_vmalloc ? vmalloc_to_pfn(src) : + page_to_pfn(virt_to_page(src)); + /* + * Treat the PTI LDT range as a *userspace* range. + * get_locked_pte() will allocate all needed pagetables + * and account for them in this mm. + */ + ptep = get_locked_pte(mm, va, &ptl); + if (!ptep) + return -ENOMEM; + /* + * Map it RO so the easy to find address is not a primary + * target via some kernel interface which misses a + * permission check. + */ + pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)); + set_pte_at(mm, va, ptep, pte); + pte_unmap_unlock(ptep, ptl); + } + + if (mm->context.ldt) { + /* + * We already had an LDT. The top-level entry should already + * have been allocated and synchronized with the usermode + * tables. + */ + WARN_ON(!had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) + WARN_ON(!kernel_to_user_pgdp(pgd)->pgd); + } else { + /* + * This is the first time we're mapping an LDT for this process. + * Sync the pgd to the usermode tables. + */ + WARN_ON(had_top_level_entry); + if (static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON(kernel_to_user_pgdp(pgd)->pgd); + set_pgd(kernel_to_user_pgdp(pgd), *pgd); + } + } + + va = (unsigned long)ldt_slot_va(slot); + flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); + + ldt->slot = slot; +#endif + return 0; +} + +static void free_ldt_pgtables(struct mm_struct *mm) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + struct mmu_gather tlb; + unsigned long start = LDT_BASE_ADDR; + unsigned long end = start + (1UL << PGDIR_SHIFT); + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + tlb_gather_mmu(&tlb, mm, start, end); + free_pgd_range(&tlb, start, end, start, end); + tlb_finish_mmu(&tlb, start, end); +#endif +} + /* After calling this, the LDT is immutable. */ static void finalize_ldt_struct(struct ldt_struct *ldt) { paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); } -/* context.lock is held */ -static void install_ldt(struct mm_struct *current_mm, - struct ldt_struct *ldt) +static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) { + mutex_lock(&mm->context.lock); + /* Synchronizes with READ_ONCE in load_mm_ldt. */ - smp_store_release(¤t_mm->context.ldt, ldt); + smp_store_release(&mm->context.ldt, ldt); - /* Activate the LDT for all CPUs using current_mm. */ - on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true); + /* Activate the LDT for all CPUs using currents mm. */ + on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); + + mutex_unlock(&mm->context.lock); } static void free_ldt_struct(struct ldt_struct *ldt) @@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt) } /* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. + * Called on fork from arch_dup_mmap(). Just copy the current LDT state, + * the new task is not running, so nothing can be installed. */ -int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) +int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ldt_struct *new_ldt; - struct mm_struct *old_mm; int retval = 0; - mutex_init(&mm->context.lock); - old_mm = current->mm; - if (!old_mm) { - mm->context.ldt = NULL; + if (!old_mm) return 0; - } mutex_lock(&old_mm->context.lock); - if (!old_mm->context.ldt) { - mm->context.ldt = NULL; + if (!old_mm->context.ldt) goto out_unlock; - } new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); if (!new_ldt) { @@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) new_ldt->nr_entries * LDT_ENTRY_SIZE); finalize_ldt_struct(new_ldt); + retval = map_ldt_struct(mm, new_ldt, 0); + if (retval) { + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } mm->context.ldt = new_ldt; out_unlock: @@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm) mm->context.ldt = NULL; } +void ldt_arch_exit_mmap(struct mm_struct *mm) +{ + free_ldt_pgtables(mm); +} + static int read_ldt(void __user *ptr, unsigned long bytecount) { struct mm_struct *mm = current->mm; unsigned long entries_size; int retval; - mutex_lock(&mm->context.lock); + down_read(&mm->context.ldt_usr_sem); if (!mm->context.ldt) { retval = 0; @@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount) retval = bytecount; out_unlock: - mutex_unlock(&mm->context.lock); + up_read(&mm->context.ldt_usr_sem); return retval; } @@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ldt.avl = 0; } - mutex_lock(&mm->context.lock); + if (down_write_killable(&mm->context.ldt_usr_sem)) + return -EINTR; old_ldt = mm->context.ldt; old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; @@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) new_ldt->entries[ldt_info.entry_number] = ldt; finalize_ldt_struct(new_ldt); + /* + * If we are using PTI, map the new LDT into the userspace pagetables. + * If there is already an LDT, use the other slot so that other CPUs + * will continue to use the old LDT until install_ldt() switches + * them over to the new LDT. + */ + error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); + if (error) { + /* + * This only can fail for the first LDT setup. If an LDT is + * already installed then the PTE page is already + * populated. Mop up a half populated page table. + */ + if (!WARN_ON_ONCE(old_ldt)) + free_ldt_pgtables(mm); + free_ldt_struct(new_ldt); + goto out_unlock; + } + install_ldt(mm, new_ldt); free_ldt_struct(old_ldt); error = 0; out_unlock: - mutex_unlock(&mm->context.lock); + up_write(&mm->context.ldt_usr_sem); out: return error; } diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 00bc751c861ce8fa42c1b93d39d029694f4e328d..edfede76868870e4cf86fed553377fd6bd0cac06 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -48,8 +48,6 @@ static void load_segments(void) "\tmovl $"STR(__KERNEL_DS)",%%eax\n" "\tmovl %%eax,%%ds\n" "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" "\tmovl %%eax,%%ss\n" : : : "eax", "memory"); #undef STR @@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image) * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ - set_gdt(phys_to_virt(0), 0); idt_invalidate(phys_to_virt(0)); + set_gdt(phys_to_virt(0), 0); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index ac0be8283325edfdc2752f862b4c0cef208a931c..9edadabf04f66c657f8a29bb56fe994b2559d5cf 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr2); PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); - PATCH_SITE(pv_mmu_ops, flush_tlb_single); PATCH_SITE(pv_cpu_ops, wbinvd); #if defined(CONFIG_PARAVIRT_SPINLOCKS) case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 97fb3e5737f5d0b5d50f8d9232726923c2692e65..832a6acd730fad70e1426052d502f6438b30e38e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -47,7 +47,7 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { +__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = { .x86_tss = { /* * .sp0 is only used when entering ring 0 from a lower @@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { * Poison it. */ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, + +#ifdef CONFIG_X86_64 + /* + * .sp1 is cpu_current_top_of_stack. The init task never + * runs user code, but cpu_current_top_of_stack should still + * be well defined before the first context switch. + */ + .sp1 = TOP_OF_INIT_STACK, +#endif + #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, @@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { */ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, #endif -#ifdef CONFIG_X86_32 - .SYSENTER_stack_canary = STACK_END_MAGIC, -#endif }; -EXPORT_PER_CPU_SYMBOL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); DEFINE_PER_CPU(bool, __tss_limit_invalid); EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); @@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk) struct fpu *fpu = &t->fpu; if (bp) { - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); @@ -299,7 +306,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, } if ((tifp ^ tifn) & _TIF_NOTSC) - cr4_toggle_bits(X86_CR4_TSD); + cr4_toggle_bits_irqsoff(X86_CR4_TSD); if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 45bf0c5f93e15103060d67d5245756ab72ce8fe5..5224c609918416337b97440eb2d515d8052463ae 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eeeb34f85c250e8c01188b6d32cf5a62bd1af8a0..c754662320163107ca3a254362ce0e404a8d3c11 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned int fsindex, gsindex; unsigned int ds, cs, es; - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, - regs->sp, regs->flags); + show_iret_regs(regs); + if (regs->orig_ax != -1) pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); else @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); + if (!all) + return; + asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%cs,%0" : "=r" (cs)); asm("movl %%es,%0" : "=r" (es)); @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all) rdmsrl(MSR_GS_BASE, gs); rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); - if (!all) - return; - cr0 = read_cr0(); cr2 = read_cr2(); cr3 = __read_cr3(); @@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Switch the PDA and FPU contexts. */ this_cpu_write(current_task, next_p); + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); /* Reload sp0. */ update_sp0(next_p); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8af2e8d0c0a1d2d0290ff2026afeab056cdc59b6..145810b0edf6738b57a0e6560c53392ae54a82e7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p) set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); } - - if (efi_enabled(EFI_BOOT)) - efi_memblock_x86_reserve_range(); #endif x86_init.oem.arch_setup(); @@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); #ifdef CONFIG_MEMORY_HOTPLUG /* * Memory used by the kernel cannot be hot-removed because Linux diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3d01df7d7cf60cdbe1342fe84006405712394663..ed556d50d7ed6d71f03d202ef84a6b6b54e95a02 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; /* Maximum number of SMT threads on any online core */ -int __max_smt_threads __read_mostly; +int __read_mostly __max_smt_threads = 1; /* Flag to indicate if a complete sched domain rebuild is required */ bool x86_topology_update; @@ -126,25 +126,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) { unsigned long flags; - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - /* * Paranoid: Set warm reset code and vector here back * to default values. @@ -237,7 +228,7 @@ static void notrace start_secondary(void *unused) load_cr3(swapper_pg_dir); __flush_tlb_all(); #endif - + load_current_idt(); cpu_init(); x86_cpuinit.early_percpu_clock_init(); preempt_disable(); @@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, initial_code = (unsigned long)start_secondary; initial_stack = idle->thread.sp; - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 + /* Enable the espfix hack for this CPU */ init_espfix_ap(cpu); -#endif /* So we see what's up */ announce_cpu(cpu, apicid); @@ -1304,7 +1291,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) * Today neither Intel nor AMD support heterogenous systems so * extrapolate the boot cpu's data to all packages. */ - ncpus = cpu_data(0).booted_cores * smp_num_siblings; + ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); pr_info("Max logical packages: %u\n", __max_logical_packages); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 77835bc021c766744dd1976b4429141e3fb645fa..093f2ea5dd56b613d03ccaeeb87fd50900f64ba1 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace, for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { - regs = unwind_get_entry_regs(&state); + regs = unwind_get_entry_regs(&state, NULL); if (regs) { /* * Kernel mode registers on the stack indicate an @@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk, { int ret; + /* + * If the task doesn't have a stack (e.g., a zombie), the stack is + * "reliably" empty. + */ if (!try_get_task_stack(tsk)) - return -EINVAL; + return 0; ret = __save_stack_trace_reliable(trace, tsk); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index a4eb27918cebf99dc2415f7eec49c4838d6f9f41..a2486f4440734756d49a96313c0e2f9a174a87db 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); pte_unmap(pte); + + /* + * PTI poisons low addresses in the kernel page tables in the + * name of making them unusable for userspace. To execute + * code at such a low address, the poison must be cleared. + * + * Note: 'pgd' actually gets set in p4d_alloc() _or_ + * pud_alloc() depending on 4/5-level paging. + */ + pgd->pgd &= ~_PAGE_NX; + return 0; } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 9a9c9b076955dd493c7de80b8d814a27dd0fceb7..a5b802a1221272402b344d75ccf94bcff4b69b38 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info) || LDT_zero(info)) { + if (LDT_empty(info) || LDT_zero(info)) memset(desc, 0, sizeof(*desc)); - } else { + else fill_ldt(desc, info); - - /* - * Always set the accessed bit so that the CPU - * doesn't try to write to the (read-only) GDT. - */ - desc->type |= 1; - } ++info; ++desc; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 989514c94a55d8fa93a07192edd199be1a607bf8..446c9ef8cfc32b68d77d4429128b3c794b3fc069 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) /* * If IRET takes a non-IST fault on the espfix64 stack, then we - * end up promoting it to a doublefault. In that case, modify - * the stack to make it look like we just entered the #GP - * handler from user space, similar to bad_iret. + * end up promoting it to a doublefault. In that case, take + * advantage of the fact that we're not using the normal (TSS.sp0) + * stack right now. We can write a fake #GP(0) frame at TSS.sp0 + * and then modify our own IRET frame so that, when we return, + * we land directly at the #GP(0) vector with the stack already + * set up according to its expectations. + * + * The net result is that our #GP handler will think that we + * entered from usermode with the bad user context. * * No need for ist_enter here because we don't use RCU. */ - if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && regs->ip == (unsigned long)native_irq_return_iret) { - struct pt_regs *normal_regs = task_pt_regs(current); + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; - /* Fake a #GP(0) from userspace. */ - memmove(&normal_regs->ip, (void *)regs->sp, 5*8); - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + /* + * regs->sp points to the failing IRET frame on the + * ESPFIX64 stack. Copy it to the entry stack. This fills + * in gpregs->ss through gpregs->ip. + * + */ + memmove(&gpregs->ip, (void *)regs->sp, 5*8); + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ + + /* + * Adjust our frame so that we return straight to the #GP + * vector with the expected RSP value. This is safe because + * we won't enable interupts or schedule before we invoke + * general_protection, so nothing will clobber the stack + * frame we just set up. + */ regs->ip = (unsigned long)general_protection; - regs->sp = (unsigned long)&normal_regs->orig_ax; + regs->sp = (unsigned long)&gpregs->orig_ax; return; } @@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * * Processors update CR2 whenever a page fault is detected. If a * second page fault occurs while an earlier page fault is being - * deliv- ered, the faulting linear address of the second fault will + * delivered, the faulting linear address of the second fault will * overwrite the contents of CR2 (replacing the previous * address). These updates to CR2 occur even if the page fault * results in a double fault or occurs during the delivery of a @@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* - * Help handler running on IST stack to switch off the IST stack if the - * interrupted code was in user mode. The actual stack switch is done in - * entry_64.S + * Help handler running on a per-cpu (IST or entry trampoline) stack + * to switch to the normal thread stack if the interrupted code was in + * user mode. The actual stack switch is done in entry_64.S */ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = task_pt_regs(current); - *regs = *eregs; + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; + if (regs != eregs) + *regs = *eregs; return regs; } NOKPROBE_SYMBOL(sync_regs); @@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) /* * This is called from entry_64.S early in handling a fault * caused by a bad iret to user mode. To handle the fault - * correctly, we want move our stack frame to task_pt_regs - * and we want to pretend that the exception came from the - * iret target. + * correctly, we want to move our stack frame to where it would + * be had we entered directly on the entry stack (rather than + * just below the IRET frame) and we want to pretend that the + * exception came from the IRET target. */ struct bad_iret_stack *new_stack = - container_of(task_pt_regs(current), - struct bad_iret_stack, regs); + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the new stack. */ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); @@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: -#if defined(CONFIG_X86_32) - /* - * This is the most likely code path that involves non-trivial use - * of the SYSENTER stack. Check that we haven't overrun it. - */ - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, - "Overran or corrupted SYSENTER stack\n"); -#endif ist_exit(regs); } NOKPROBE_SYMBOL(do_debug); @@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { + /* Init cpu_entry_area before IST entries are set up */ + setup_cpu_entry_areas(); + idt_setup_traps(); /* @@ -936,8 +952,9 @@ void __init trap_init(void) * "sidt" instruction will not leak the location of the kernel, and * to defend the IDT against arbitrary memory write vulnerabilities. * It will be reloaded in cpu_init() */ - __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); - idt_descr.address = fix_to_virt(FIX_RO_IDT); + cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table), + PAGE_KERNEL_RO); + idt_descr.address = CPU_ENTRY_AREA_RO_IDT; /* * Should be a barrier for any external CPU state: diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index a3f973b2c97a03b121fe0173dbdc9298216721e6..be86a865087a6b9dc8e04031dbf2e2fbeeda1ed5 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) return NULL; } -static bool stack_access_ok(struct unwind_state *state, unsigned long addr, +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, size_t len) { struct stack_info *info = &state->stack_info; + void *addr = (void *)_addr; - /* - * If the address isn't on the current stack, switch to the next one. - * - * We may have to traverse multiple stacks to deal with the possibility - * that info->next_sp could point to an empty stack and the address - * could be on a subsequent stack. - */ - while (!on_stack(info, (void *)addr, len)) - if (get_stack_info(info->next_sp, state->task, info, - &state->stack_mask)) - return false; + if (!on_stack(info, addr, len) && + (get_stack_info(addr, state->task, info, &state->stack_mask))) + return false; return true; } @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, return true; } -#define REGS_SIZE (sizeof(struct pt_regs)) -#define SP_OFFSET (offsetof(struct pt_regs, sp)) -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) - static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, - unsigned long *ip, unsigned long *sp, bool full) + unsigned long *ip, unsigned long *sp) { - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); - - if (IS_ENABLED(CONFIG_X86_64)) { - if (!stack_access_ok(state, addr, regs_size)) - return false; + struct pt_regs *regs = (struct pt_regs *)addr; - *ip = regs->ip; - *sp = regs->sp; + /* x86-32 support will be more complicated due to the ®s->sp hack */ + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32)); - return true; - } - - if (!stack_access_ok(state, addr, sp_offset)) + if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false; *ip = regs->ip; + *sp = regs->sp; + return true; +} - if (user_mode(regs)) { - if (!stack_access_ok(state, addr + sp_offset, - REGS_SIZE - SP_OFFSET)) - return false; +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, + unsigned long *ip, unsigned long *sp) +{ + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET; - *sp = regs->sp; - } else - *sp = (unsigned long)®s->sp; + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) + return false; + *ip = regs->ip; + *sp = regs->sp; return true; } @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state) unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; enum stack_type prev_type = state->stack_info.type; struct orc_entry *orc; - struct pt_regs *ptregs; bool indirect = false; if (unwind_done(state)) @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_TYPE_REGS: - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_TYPE_REGS_IRET: - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { orc_warn("can't dereference iret registers at %p for ip %pB\n", (void *)sp, (void *)orig_ip); goto done; } - ptregs = container_of((void *)sp, struct pt_regs, ip); - if ((unsigned long)ptregs >= prev_sp && - on_stack(&state->stack_info, ptregs, REGS_SIZE)) { - state->regs = ptregs; - state->full_regs = false; - } else - state->regs = NULL; - + state->regs = (void *)sp - IRET_FRAME_OFFSET; + state->full_regs = false; state->signal = true; break; @@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } if (get_stack_info((unsigned long *)state->sp, state->task, - &state->stack_info, &state->stack_mask)) - return; + &state->stack_info, &state->stack_mask)) { + /* + * We weren't on a valid stack. It's possible that + * we overflowed a valid stack into a guard page. + * See if the next page up is valid so that we can + * generate some kind of backtrace if this happens. + */ + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); + if (get_stack_info(next_page, state->task, &state->stack_info, + &state->stack_mask)) + return; + } /* * The caller can provide the address of the first frame directly diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a4009fb9be8725ce7bda96cd5e8160e524903266..1e413a9326aaa152a47d51fa4c1d1ea03cbb4d94 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -61,11 +61,17 @@ jiffies_64 = jiffies; . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; +#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); +#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); + #else #define X64_ALIGN_RODATA_BEGIN #define X64_ALIGN_RODATA_END +#define ALIGN_ENTRY_TEXT_BEGIN +#define ALIGN_ENTRY_TEXT_END + #endif PHDRS { @@ -102,11 +108,22 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + ALIGN_ENTRY_TEXT_BEGIN ENTRY_TEXT IRQENTRY_TEXT + ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) + +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + _entry_trampoline = .; + *(.entry_trampoline) + . = ALIGN(PAGE_SIZE); + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); +#endif + /* End of text section */ _etext = .; } :text = 0x9090 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e7d04d0c8008d1a1d69966105ad855351a1f474f..b514b2b2845a334d4b53f28ed0b73c96f12d0e6a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op) static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { - ctxt->ops->get_fpu(ctxt); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } - ctxt->ops->put_fpu(ctxt); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fninit"); - ctxt->ops->put_fpu(ctxt); return X86EMUL_CONTINUE; } @@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fnstcw %0": "+m"(fcw)); - ctxt->ops->put_fpu(ctxt); ctxt->dst.val = fcw; @@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); - ctxt->ops->get_fpu(ctxt); asm volatile("fnstsw %0": "+m"(fsw)); - ctxt->ops->put_fpu(ctxt); ctxt->dst.val = fsw; @@ -2404,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n) } static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr4) + u64 cr0, u64 cr3, u64 cr4) { int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = ctxt->ops->set_cr(ctxt, 3, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; /* * First enable PAE, long mode needs it before CR0.PG = 1 is set. @@ -2425,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, bad = ctxt->ops->set_cr(ctxt, 4, cr4); if (bad) return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + } return X86EMUL_CONTINUE; @@ -2435,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) struct desc_struct desc; struct desc_ptr dt; u16 selector; - u32 val, cr0, cr4; + u32 val, cr0, cr3, cr4; int i; cr0 = GET_SMSTATE(u32, smbase, 0x7ffc); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8)); + cr3 = GET_SMSTATE(u32, smbase, 0x7ff8); ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED; ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0); @@ -2481,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8)); - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); } static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) { struct desc_struct desc; struct desc_ptr dt; - u64 val, cr0, cr4; + u64 val, cr0, cr3, cr4; u32 base3; u16 selector; int i, r; @@ -2505,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1); cr0 = GET_SMSTATE(u64, smbase, 0x7f58); - ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50)); + cr3 = GET_SMSTATE(u64, smbase, 0x7f50); cr4 = GET_SMSTATE(u64, smbase, 0x7f48); ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00)); val = GET_SMSTATE(u64, smbase, 0x7ed0); @@ -2533,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); - r = rsm_enter_protected_mode(ctxt, cr0, cr4); + r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); if (r != X86EMUL_CONTINUE) return r; @@ -4001,12 +4005,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->get_fpu(ctxt); - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); - ctxt->ops->put_fpu(ctxt); - if (rc != X86EMUL_CONTINUE) return rc; @@ -4049,8 +4049,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->ops->get_fpu(ctxt); - if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4066,8 +4064,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: - ctxt->ops->put_fpu(ctxt); - return rc; } @@ -5317,9 +5313,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; - ctxt->ops->get_fpu(ctxt); rc = asm_safe("fwait"); - ctxt->ops->put_fpu(ctxt); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e5e66e5c664057bb5cc5ad2660008ccbf19b69e5..2b8eb4da4d0823bdcdf7bde1feb44132d0113cde 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if(make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, 0, 0, vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); @@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if (make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); @@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if (make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, root_gfn, 0, vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); @@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); if (make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); - return 1; + return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, 0, ACC_ALL); @@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) { if (unlikely(!lapic_in_kernel(vcpu) || - kvm_event_needs_reinjection(vcpu))) + kvm_event_needs_reinjection(vcpu) || + vcpu->arch.exception.pending)) return false; if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) @@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void) int kvm_mmu_module_init(void) { + int ret = -ENOMEM; + kvm_mmu_clear_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), 0, SLAB_ACCOUNT, NULL); if (!pte_list_desc_cache) - goto nomem; + goto out; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", sizeof(struct kvm_mmu_page), 0, SLAB_ACCOUNT, NULL); if (!mmu_page_header_cache) - goto nomem; + goto out; if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL)) - goto nomem; + goto out; - register_shrinker(&mmu_shrinker); + ret = register_shrinker(&mmu_shrinker); + if (ret) + goto out; return 0; -nomem: +out: mmu_destroy_caches(); - return -ENOMEM; + return ret; } /* diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index eb714f1cdf7eee4ca9036005c3ab72ef9228ae9b..f40d0da1f1d321e4705f24ee85b80ad15233e438 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "trace.h" @@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; - u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; - /* No need to intercept #UD if L1 doesn't intercept it */ - h_intercept_exceptions = - h->intercept_exceptions & ~(1U << UD_VECTOR); - c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = - h_intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm) { int er; - WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; @@ -4985,6 +4979,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" +#endif + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" +#ifdef CONFIG_X86_64 + "xor %%r8, %%r8 \n\t" + "xor %%r9, %%r9 \n\t" + "xor %%r10, %%r10 \n\t" + "xor %%r11, %%r11 \n\t" + "xor %%r12, %%r12 \n\t" + "xor %%r13, %%r13 \n\t" + "xor %%r14, %%r14 \n\t" + "xor %%r15, %%r15 \n\t" #endif "pop %%" _ASM_BP : @@ -5015,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, svm->host.gs_base); #else diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4704aaf6d19e2ea36e3d12ddc0c345bc8ed3632f..c829d89e2e63f85bc36c6821b0ca1d7712297043 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field) { BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) || - vmcs_field_to_offset_table[field] == 0) + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) + return -ENOENT; + + /* + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a + * generic mechanism. + */ + asm("lfence"); + + if (vmcs_field_to_offset_table[field] == 0) return -ENOENT; return vmcs_field_to_offset_table[field]; @@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; - else - eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -2302,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. See 22.2.4. */ vmcs_writel(HOST_TR_BASE, - (unsigned long)this_cpu_ptr(&cpu_tss)); + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ /* @@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er == EMULATE_USER_EXIT) return 0; @@ -6751,16 +6757,10 @@ static __init int hardware_setup(void) goto out; } - vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); - /* - * Allow direct access to the PC debug port (it is often used for I/O - * delays, but the vmexits simply slow things down). - */ memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); - clear_bit(0x80, vmx_io_bitmap_a); memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); @@ -9421,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" + "setbe %c[fail](%0)\n\t" "mov %%" _ASM_AX ", %c[rax](%0) \n\t" "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" __ASM_SIZE(pop) " %c[rcx](%0) \n\t" @@ -9437,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif "mov %%cr2, %%" _ASM_AX " \n\t" "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" + "xor %%eax, %%eax \n\t" + "xor %%ebx, %%ebx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - "setbe %c[fail](%0) \n\t" ".pushsection .rodata \n\t" ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" @@ -9479,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (debugctlmsr) update_debugctlmsr(debugctlmsr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eee8e7faf1af5778763b8181df472651d3573149..1cec2c62a0b08405d2bd7c8908d6b7f33de3b63c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); } @@ -4385,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4644,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4666,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } @@ -5252,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) emul_to_vcpu(ctxt)->arch.halt_request = 1; } -static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) -{ - preempt_disable(); - kvm_load_guest_fpu(emul_to_vcpu(ctxt)); -} - -static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) -{ - preempt_enable(); -} - static int emulator_intercept(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -5340,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = { .halt = emulator_halt, .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, - .get_fpu = emulator_get_fpu, - .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, .set_nmi_mask = emulator_set_nmi_mask, @@ -6778,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + unsigned long apic_address; + + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (start <= apic_address && apic_address < end) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; @@ -6952,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -7265,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct fpu *fpu = ¤t->thread.fpu; int r; - fpu__initialize(fpu); - kvm_sigset_activate(vcpu); + kvm_load_guest_fpu(vcpu); + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -7312,6 +7310,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = vcpu_run(vcpu); out: + kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7381,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) #endif kvm_rip_write(vcpu, regs->rip); - kvm_set_rflags(vcpu, regs->rflags); + kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); vcpu->arch.exception.pending = false; @@ -7495,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); +int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) { + /* + * When EFER.LME and CR0.PG are set, the processor is in + * 64-bit mode (though maybe in a 32-bit code segment). + * CR4.PAE and EFER.LMA must be set. + */ + if (!(sregs->cr4 & X86_CR4_PAE_BIT) + || !(sregs->efer & EFER_LMA)) + return -EINVAL; + } else { + /* + * Not in 64-bit mode: EFER.LMA is clear and the code + * segment cannot be 64-bit. + */ + if (sregs->efer & EFER_LMA || sregs->cs.l) + return -EINVAL; + } + + return 0; +} + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -7507,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, (sregs->cr4 & X86_CR4_OSXSAVE)) return -EINVAL; + if (kvm_valid_sregs(vcpu, sregs)) + return -EINVAL; + apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) @@ -7704,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +/* Swap (qemu) user FPU context for the guest FPU context. */ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - if (vcpu->guest_fpu_loaded) - return; - - /* - * Restore all possible states in the guest, - * and assume host would use all available bits. - * Guest xcr0 would be loaded later. - */ - vcpu->guest_fpu_loaded = 1; - __kernel_fpu_begin(); + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ~XFEATURE_MASK_PKRU); + preempt_enable(); trace_kvm_fpu(1); } +/* When vcpu_run ends, restore user space FPU context. */ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) - return; - - vcpu->guest_fpu_loaded = 0; + preempt_disable(); copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - __kernel_fpu_end(); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -7846,7 +7864,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) * To avoid have the INIT path from kvm_apic_has_events() that be * called with loaded FPU and does not let userspace fix the state. */ - kvm_put_guest_fpu(vcpu); + if (init_event) + kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, XFEATURE_MASK_BNDREGS); if (mpx_state_buffer) @@ -7855,6 +7874,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) XFEATURE_MASK_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); + if (init_event) + kvm_load_guest_fpu(vcpu); } if (!init_event) { diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 7b181b61170e769fc41a062a3eddbb62c4e53793..f23934bbaf4ebe6a55331669160d6b0aa72ba5d4 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o +lib-$(CONFIG_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4d34bb548b41ebdddc20fcf464aad4cb44c6a763..46e71a74e6129b861c233ccf86f452b485e62d59 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -29,7 +29,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -156,7 +157,7 @@ ENTRY(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - jmp *%ebx + JMP_NOSPEC %ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 553f8fd23cc4733d0edafa862b95446f7a04bab1..4846eff7e4c8b1505501d7f1dcb64127d0a4c67c 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops) delay = min_t(u64, MWAITX_MAX_LOOPS, loops); /* - * Use cpu_tss as a cacheline-aligned, seldomly + * Use cpu_tss_rw as a cacheline-aligned, seldomly * accessed per-cpu variable as the monitor target. */ - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); /* * AMD, like Intel, supports the EAX hint and EAX=0xf diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S new file mode 100644 index 0000000000000000000000000000000000000000..cb45c6cb465f4b1ddea6b1a36a26f8ed3abd79db --- /dev/null +++ b/arch/x86/lib/retpoline.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include + +.macro THUNK reg + .section .text.__x86.indirect_thunk.\reg + +ENTRY(__x86_indirect_thunk_\reg) + CFI_STARTPROC + JMP_NOSPEC %\reg + CFI_ENDPROC +ENDPROC(__x86_indirect_thunk_\reg) +.endm + +/* + * Despite being an assembler file we can't just use .irp here + * because __KSYM_DEPS__ only uses the C preprocessor and would + * only see one instance of "__x86_indirect_thunk_\reg" rather + * than one per register with the correct names. So we do it + * the simple and nasty way... + */ +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg) +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) + +GENERATE_THUNK(_ASM_AX) +GENERATE_THUNK(_ASM_BX) +GENERATE_THUNK(_ASM_CX) +GENERATE_THUNK(_ASM_DX) +GENERATE_THUNK(_ASM_SI) +GENERATE_THUNK(_ASM_DI) +GENERATE_THUNK(_ASM_BP) +GENERATE_THUNK(_ASM_SP) +#ifdef CONFIG_64BIT +GENERATE_THUNK(r8) +GENERATE_THUNK(r9) +GENERATE_THUNK(r10) +GENERATE_THUNK(r11) +GENERATE_THUNK(r12) +GENERATE_THUNK(r13) +GENERATE_THUNK(r14) +GENERATE_THUNK(r15) +#endif diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index c4d55919fac19e06afbb00a4124fbf1b334b4d46..e0b85930dd773e87417e2b4957b8af61221b04c0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -ff: +ff: UD0 EndTable Table: 3-byte opcode 1 (0x0f 0x38) @@ -717,7 +717,7 @@ AVXcode: 2 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 80: INVEPT Gy,Mdq (66) -81: INVPID Gy,Mdq (66) +81: INVVPID Gy,Mdq (66) 82: INVPCID Gy,Mdq (66) 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 88: vexpandps/d Vpd,Wpd (66),(ev) @@ -970,6 +970,15 @@ GrpTable: Grp9 EndTable GrpTable: Grp10 +# all are UD1 +0: UD1 +1: UD1 +2: UD1 +3: UD1 +4: UD1 +5: UD1 +6: UD1 +7: UD1 EndTable # Grp11A and Grp11B are expressed as Grp11 in Intel SDM diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 8e13b8cc6bedb0dc84eea64cd80ca6ae39037eaa..27e9e90a8d3572b900ccaacae1623de803072b17 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) @@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_X86_INTEL_MPX) += mpx.o -obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o -obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_X86_INTEL_MPX) += mpx.o +obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o +obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o +obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c new file mode 100644 index 0000000000000000000000000000000000000000..b9283cc276220db667ab091a3358eb5741813f7f --- /dev/null +++ b/arch/x86/mm/cpu_entry_area.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#include +#include +#include +#include + +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); + +#ifdef CONFIG_X86_64 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); +#endif + +struct cpu_entry_area *get_cpu_entry_area(int cpu) +{ + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); + + return (struct cpu_entry_area *) va; +} +EXPORT_SYMBOL(get_cpu_entry_area); + +void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) +{ + unsigned long va = (unsigned long) cea_vaddr; + + set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags)); +} + +static void __init +cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) +{ + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); +} + +static void percpu_setup_debug_store(int cpu) +{ +#ifdef CONFIG_CPU_SUP_INTEL + int npages; + void *cea; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + cea = &get_cpu_entry_area(cpu)->cpu_debug_store; + npages = sizeof(struct debug_store) / PAGE_SIZE; + BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0); + cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages, + PAGE_KERNEL); + + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers; + /* + * Force the population of PMDs for not yet allocated per cpu + * memory like debug store buffers. + */ + npages = sizeof(struct debug_store_buffers) / PAGE_SIZE; + for (; npages; npages--, cea += PAGE_SIZE) + cea_set_pte(cea, 0, PAGE_NONE); +#endif +} + +/* Setup the fixmap mappings only once per-processor */ +static void __init setup_cpu_entry_area(int cpu) +{ +#ifdef CONFIG_X86_64 + extern char _entry_trampoline[]; + + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ + pgprot_t gdt_prot = PAGE_KERNEL_RO; + pgprot_t tss_prot = PAGE_KERNEL_RO; +#else + /* + * On native 32-bit systems, the GDT cannot be read-only because + * our double fault handler uses a task gate, and entering through + * a task gate needs to change an available TSS to busy. If the + * GDT is read-only, that will triple fault. The TSS cannot be + * read-only because the CPU writes to it on task switches. + * + * On Xen PV, the GDT must be read-only because the hypervisor + * requires it. + */ + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? + PAGE_KERNEL_RO : PAGE_KERNEL; + pgprot_t tss_prot = PAGE_KERNEL; +#endif + + cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), + gdt_prot); + + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, + per_cpu_ptr(&entry_stack_storage, cpu), 1, + PAGE_KERNEL); + + /* + * The Intel SDM says (Volume 3, 7.2.1): + * + * Avoid placing a page boundary in the part of the TSS that the + * processor reads during a task switch (the first 104 bytes). The + * processor may not correctly perform address translations if a + * boundary occurs in this area. During a task switch, the processor + * reads and writes into the first 104 bytes of each TSS (using + * contiguous physical addresses beginning with the physical address + * of the first byte of the TSS). So, after TSS access begins, if + * part of the 104 bytes is not physically contiguous, the processor + * will access incorrect information without generating a page-fault + * exception. + * + * There are also a lot of errata involving the TSS spanning a page + * boundary. Assert that we're not doing that. + */ + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, + &per_cpu(cpu_tss_rw, cpu), + sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); + +#ifdef CONFIG_X86_32 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); +#endif + +#ifdef CONFIG_X86_64 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); + BUILD_BUG_ON(sizeof(exception_stacks) != + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); + cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, + &per_cpu(exception_stacks, cpu), + sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); + + cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline, + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); +#endif + percpu_setup_debug_store(cpu); +} + +static __init void setup_cpu_entry_area_ptes(void) +{ +#ifdef CONFIG_X86_32 + unsigned long start, end; + + BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); + + start = CPU_ENTRY_AREA_BASE; + end = start + CPU_ENTRY_AREA_MAP_SIZE; + + /* Careful here: start + PMD_SIZE might wrap around */ + for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE) + populate_extra_pte(start); +#endif +} + +void __init setup_cpu_entry_areas(void) +{ + unsigned int cpu; + + setup_cpu_entry_area_ptes(); + + for_each_possible_cpu(cpu) + setup_cpu_entry_area(cpu); +} diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index bfcffdf6c5775f7ac5bd4c9f768573d7ae7bba55..421f2664ffa06e6cd4b31e5d6521648add3e2c4e 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -5,7 +5,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level(m, NULL); + ptdump_walk_pgd_level_debugfs(m, NULL, false); return 0; } @@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static struct dentry *pe; +static int ptdump_show_curknl(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curknl(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curknl, NULL); +} + +static const struct file_operations ptdump_curknl_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curknl, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION +static struct dentry *pe_curusr; + +static int ptdump_show_curusr(struct seq_file *m, void *v) +{ + if (current->mm->pgd) { + down_read(¤t->mm->mmap_sem); + ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true); + up_read(¤t->mm->mmap_sem); + } + return 0; +} + +static int ptdump_open_curusr(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_curusr, NULL); +} + +static const struct file_operations ptdump_curusr_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_curusr, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +static struct dentry *dir, *pe_knl, *pe_curknl; static int __init pt_dump_debug_init(void) { - pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL, - &ptdump_fops); - if (!pe) + dir = debugfs_create_dir("page_tables", NULL); + if (!dir) return -ENOMEM; + pe_knl = debugfs_create_file("kernel", 0400, dir, NULL, + &ptdump_fops); + if (!pe_knl) + goto err; + + pe_curknl = debugfs_create_file("current_kernel", 0400, + dir, NULL, &ptdump_curknl_fops); + if (!pe_curknl) + goto err; + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pe_curusr = debugfs_create_file("current_user", 0400, + dir, NULL, &ptdump_curusr_fops); + if (!pe_curusr) + goto err; +#endif return 0; +err: + debugfs_remove_recursive(dir); + return -ENOMEM; } static void __exit pt_dump_debug_exit(void) { - debugfs_remove_recursive(pe); + debugfs_remove_recursive(dir); } module_init(pt_dump_debug_init); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 5e3ac6fe6c9e32ed1906f4f9bf736310a7193c7d..2a4849e92831b0f9771a2742b01c5a6c9c806a42 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -44,68 +44,97 @@ struct addr_marker { unsigned long max_lines; }; -/* indices for address_markers; keep sync'd w/ address_markers below */ +/* Address space markers hints */ + +#ifdef CONFIG_X86_64 + enum address_markers_idx { USER_SPACE_NR = 0, -#ifdef CONFIG_X86_64 KERNEL_SPACE_NR, LOW_KERNEL_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif VMALLOC_START_NR, VMEMMAP_START_NR, #ifdef CONFIG_KASAN KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif -# ifdef CONFIG_X86_ESPFIX64 + CPU_ENTRY_AREA_NR, +#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) + LDT_NR, +#endif +#ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, -# endif +#endif +#ifdef CONFIG_EFI + EFI_END_NR, +#endif HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, -#else + FIXADDR_START_NR, + END_OF_SPACE_NR, +}; + +static struct addr_marker address_markers[] = { + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, + [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, +#ifdef CONFIG_KASAN + [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, + [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL + [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, +#endif + [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, +#ifdef CONFIG_X86_ESPFIX64 + [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, +#endif +#ifdef CONFIG_EFI + [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, +#endif + [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, + [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, + [MODULES_END_NR] = { MODULES_END, "End Modules" }, + [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, + [END_OF_SPACE_NR] = { -1, NULL } +}; + +#else /* CONFIG_X86_64 */ + +enum address_markers_idx { + USER_SPACE_NR = 0, KERNEL_SPACE_NR, VMALLOC_START_NR, VMALLOC_END_NR, -# ifdef CONFIG_HIGHMEM +#ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, -# endif - FIXADDR_START_NR, #endif + CPU_ENTRY_AREA_NR, + FIXADDR_START_NR, + END_OF_SPACE_NR, }; -/* Address space markers hints */ static struct addr_marker address_markers[] = { - { 0, "User Space" }, -#ifdef CONFIG_X86_64 - { 0x8000000000000000UL, "Kernel Space" }, - { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/* VMEMMAP_START */, "Vmemmap" }, -#ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "KASAN shadow" }, - { KASAN_SHADOW_END, "KASAN shadow end" }, + [USER_SPACE_NR] = { 0, "User Space" }, + [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, + [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, + [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, +#ifdef CONFIG_HIGHMEM + [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif -# ifdef CONFIG_X86_ESPFIX64 - { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, -# endif -# ifdef CONFIG_EFI - { EFI_VA_END, "EFI Runtime Services" }, -# endif - { __START_KERNEL_map, "High Kernel Mapping" }, - { MODULES_VADDR, "Modules" }, - { MODULES_END, "End Modules" }, -#else - { PAGE_OFFSET, "Kernel Mapping" }, - { 0/* VMALLOC_START */, "vmalloc() Area" }, - { 0/*VMALLOC_END*/, "vmalloc() End" }, -# ifdef CONFIG_HIGHMEM - { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, -# endif - { 0/*FIXADDR_START*/, "Fixmap Area" }, -#endif - { -1, NULL } /* End of list */ + [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, + [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, + [END_OF_SPACE_NR] = { -1, NULL } }; +#endif /* !CONFIG_X86_64 */ + /* Multipliers for offsets within the PTEs */ #define PTE_LEVEL_MULT (PAGE_SIZE) #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) @@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) static const char * const level_name[] = { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; - if (!pgprot_val(prot)) { + if (!(pr & _PAGE_PRESENT)) { /* Not present */ pt_dump_cont_printf(m, dmsg, " "); } else { @@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx) } static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, - bool checkwx) + bool checkwx, bool dmesg) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_top_pgt; @@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, if (pgd) { start = pgd; - st.to_dmesg = true; + st.to_dmesg = dmesg; } st.check_wx = checkwx; @@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { - ptdump_walk_pgd_level_core(m, pgd, false); + ptdump_walk_pgd_level_core(m, pgd, false, true); +} + +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + if (user && static_cpu_has(X86_FEATURE_PTI)) + pgd = kernel_to_user_pgdp(pgd); +#endif + ptdump_walk_pgd_level_core(m, pgd, false, false); +} +EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); + +static void ptdump_walk_user_pgd_level_checkwx(void) +{ +#ifdef CONFIG_PAGE_TABLE_ISOLATION + pgd_t *pgd = (pgd_t *) &init_top_pgt; + + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("x86/mm: Checking user space page tables\n"); + pgd = kernel_to_user_pgdp(pgd); + ptdump_walk_pgd_level_core(NULL, pgd, true, false); +#endif } -EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true); + ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_user_pgd_level_checkwx(); } static int __init pt_dump_init(void) @@ -525,8 +578,8 @@ static int __init pt_dump_init(void) address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; + address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; #endif - return 0; } __initcall(pt_dump_init); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 3321b446b66cdb99f16fe145bf9bad50f9d1fd01..9fe656c42aa5b16560e139cebba247ca52756c80 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -82,7 +83,7 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup, return true; } -EXPORT_SYMBOL_GPL(ex_handler_refcount); +EXPORT_SYMBOL(ex_handler_refcount); /* * Handler for when we fail to restore a task's FPU state. We should never get @@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr) * Old CPUs leave the high bits of CS on the stack * undefined. I'm not sure which CPUs do this, but at least * the 486 DX works this way. + * Xen pv domains are not using the default __KERNEL_CS. */ - if (regs->cs != __KERNEL_CS) + if (!xen_pv_domain() && regs->cs != __KERNEL_CS) goto fail; /* diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 78ca9a8ee4548a270045e81841ef6380ed6d260a..06fe3d51d385b88111961c0b5addc673fcd597a2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -701,7 +701,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, else printk(KERN_CONT "paging request"); - printk(KERN_CONT " at %p\n", (void *) address); + printk(KERN_CONT " at %px\n", (void *) address); printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); dump_pagetable(address); @@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6fdf91ef130a4737ab434ce98f77b2583fe1840d..82f5252c723a4a544593067981d90191c7b22c1d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -160,6 +161,12 @@ struct map_range { static int page_size_mask; +static void enable_global_pages(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + __supported_pte_mask |= _PAGE_GLOBAL; +} + static void __init probe_page_size_mask(void) { /* @@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void) cr4_set_bits_and_update_boot(X86_CR4_PSE); /* Enable PGE if available */ + __supported_pte_mask &= ~_PAGE_GLOBAL; if (boot_cpu_has(X86_FEATURE_PGE)) { cr4_set_bits_and_update_boot(X86_CR4_PGE); - __supported_pte_mask |= _PAGE_GLOBAL; - } else - __supported_pte_mask &= ~_PAGE_GLOBAL; + enable_global_pages(); + } /* Enable 1 GB linear kernel mappings if available: */ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) { @@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void) static void setup_pcid(void) { -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCID)) { - if (boot_cpu_has(X86_FEATURE_PGE)) { - /* - * This can't be cr4_set_bits_and_update_boot() -- - * the trampoline code can't handle CR4.PCIDE and - * it wouldn't do any good anyway. Despite the name, - * cr4_set_bits_and_update_boot() doesn't actually - * cause the bits in question to remain set all the - * way through the secondary boot asm. - * - * Instead, we brute-force it and set CR4.PCIDE - * manually in start_secondary(). - */ - cr4_set_bits(X86_CR4_PCIDE); - } else { - /* - * flush_tlb_all(), as currently implemented, won't - * work if PCID is on but PGE is not. Since that - * combination doesn't exist on real hardware, there's - * no reason to try to fully support it, but it's - * polite to avoid corrupting data if we're on - * an improperly configured VM. - */ - setup_clear_cpu_cap(X86_FEATURE_PCID); - } + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + if (!boot_cpu_has(X86_FEATURE_PCID)) + return; + + if (boot_cpu_has(X86_FEATURE_PGE)) { + /* + * This can't be cr4_set_bits_and_update_boot() -- the + * trampoline code can't handle CR4.PCIDE and it wouldn't + * do any good anyway. Despite the name, + * cr4_set_bits_and_update_boot() doesn't actually cause + * the bits in question to remain set all the way through + * the secondary boot asm. + * + * Instead, we brute-force it and set CR4.PCIDE manually in + * start_secondary(). + */ + cr4_set_bits(X86_CR4_PCIDE); + + /* + * INVPCID's single-context modes (2/3) only work if we set + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable + * on systems that have X86_CR4_PCIDE clear, or that have + * no INVPCID support at all. + */ + if (boot_cpu_has(X86_FEATURE_INVPCID)) + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE); + } else { + /* + * flush_tlb_all(), as currently implemented, won't work if + * PCID is on but PGE is not. Since that combination + * doesn't exist on real hardware, there's no reason to try + * to fully support it, but it's polite to avoid corrupting + * data if we're on an improperly configured VM. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); } -#endif } #ifdef CONFIG_X86_32 @@ -622,6 +639,7 @@ void __init init_mem_mapping(void) { unsigned long end; + pti_check_boottime_disable(); probe_page_size_mask(); setup_pcid(); @@ -845,12 +863,12 @@ void __init zone_sizes_init(void) free_area_init_nodes(max_zone_pfns); } -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; -EXPORT_SYMBOL_GPL(cpu_tlbstate); +EXPORT_PER_CPU_SYMBOL(cpu_tlbstate); void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a64a6f2848d9be2e73a341f4d87ab2dc35de09f..135c9a7898c7da908f1340f9750774b4327e63b3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include "mm_internal.h" @@ -766,6 +767,7 @@ void __init mem_init(void) mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" + " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n" #ifdef CONFIG_HIGHMEM " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" #endif @@ -777,6 +779,10 @@ void __init mem_init(void) FIXADDR_START, FIXADDR_TOP, (FIXADDR_TOP - FIXADDR_START) >> 10, + CPU_ENTRY_AREA_BASE, + CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE, + CPU_ENTRY_AREA_MAP_SIZE >> 10, + #ifdef CONFIG_HIGHMEM PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6e4573b1da341bd41095b11a692afcba51e4b850..c45b6ec5357bcd2e9f6626bd738c700cccd0a173 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr) return; } + mmiotrace_iounmap(addr); + addr = (volatile void __iomem *) (PAGE_MASK & (unsigned long __force)addr); - mmiotrace_iounmap(addr); - /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 99dfed6dfef8b2f9028f82b89ab8dc2bde8173c4..47388f0c0e59649ca3574d4e7c31b356dad7d247 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -15,6 +15,7 @@ #include #include #include +#include extern struct range pfn_mapped[E820_MAX_ENTRIES]; @@ -277,6 +278,7 @@ void __init kasan_early_init(void) void __init kasan_init(void) { int i; + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; #ifdef CONFIG_KASAN_INLINE register_die_notifier(&kasan_die_notifier); @@ -321,16 +323,33 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } + shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, + PAGE_SIZE); + + shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, + PAGE_SIZE); + kasan_populate_zero_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - kasan_mem_to_shadow((void *)__START_KERNEL_map)); + shadow_cpu_entry_begin); + + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, + (unsigned long)shadow_cpu_entry_end, 0); + + kasan_populate_zero_shadow(shadow_cpu_entry_end, + kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), (unsigned long)kasan_mem_to_shadow(_end), early_pfn_to_nid(__pa(_stext))); kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), - (void *)KASAN_SHADOW_END); + (void *)KASAN_SHADOW_END); load_cr3(init_top_pgt); __flush_tlb_all(); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 879ef930e2c2b04d60c85efec9f6e0fa5555fa2a..aedebd2ebf1ead0ff2b1b17816ff80d9fce88b8f 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -34,25 +34,14 @@ #define TB_SHIFT 40 /* - * Virtual address start and end range for randomization. The end changes base - * on configuration to have the highest amount of space for randomization. - * It increases the possible random position for each randomized region. + * Virtual address start and end range for randomization. * - * You need to add an if/def entry if you introduce a new memory region - * compatible with KASLR. Your entry must be in logical order with memory - * layout. For example, ESPFIX is before EFI because its virtual address is - * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to - * ensure that this order is correct and won't be changed. + * The end address could depend on more configuration options to make the + * highest amount of space for randomization available, but that's too hard + * to keep straight and caused issues already. */ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; - -#if defined(CONFIG_X86_ESPFIX64) -static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; -#elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_END; -#else -static const unsigned long vaddr_end = __START_KERNEL_map; -#endif +static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; /* Default values */ unsigned long page_offset_base = __PAGE_OFFSET_BASE; @@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void) unsigned long remain_entropy; /* - * All these BUILD_BUG_ON checks ensures the memory layout is - * consistent with the vaddr_start/vaddr_end variables. + * These BUILD_BUG_ON checks ensure the memory layout is consistent + * with the vaddr_start/vaddr_end variables. These checks are very + * limited.... */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_END); - BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || - IS_ENABLED(CONFIG_EFI)) && - vaddr_end >= __START_KERNEL_map); + BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); if (!kaslr_memory_enabled()) diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c deleted file mode 100644 index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/error.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h deleted file mode 100644 index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/error.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c deleted file mode 100644 index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/opcode.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h deleted file mode 100644 index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/opcode.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c deleted file mode 100644 index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/pte.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h deleted file mode 100644 index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/pte.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c deleted file mode 100644 index cec5940325151e407aa90128a35cb683afd436d7..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/selftest.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h deleted file mode 100644 index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/selftest.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h deleted file mode 100644 index ea32a7d3cf1bc87963b259a9902042a7c33e41e4..0000000000000000000000000000000000000000 --- a/arch/x86/mm/kmemcheck/shadow.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index c21c2ed046120c8e12d439e71848200f11c41bd5..58477ec3d66d08acf07c1bc21bb9a55a78fcaa28 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p) unsigned long flags; int ret = 0; unsigned long size = 0; + unsigned long addr = p->addr & PAGE_MASK; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); unsigned int l; pte_t *pte; spin_lock_irqsave(&kmmio_lock, flags); - if (get_kmmio_probe(p->addr)) { + if (get_kmmio_probe(addr)) { ret = -EEXIST; goto out; } - pte = lookup_address(p->addr, &l); + pte = lookup_address(addr, &l); if (!pte) { ret = -EINVAL; goto out; @@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p) kmmio_count++; list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { - if (add_kmmio_fault_page(p->addr + size)) + if (add_kmmio_fault_page(addr + size)) pr_err("Unable to set page fault.\n"); size += page_level_size(l); } @@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p) { unsigned long flags; unsigned long size = 0; + unsigned long addr = p->addr & PAGE_MASK; const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); struct kmmio_fault_page *release_list = NULL; struct kmmio_delayed_release *drelease; unsigned int l; pte_t *pte; - pte = lookup_address(p->addr, &l); + pte = lookup_address(addr, &l); if (!pte) return; spin_lock_irqsave(&kmmio_lock, flags); while (size < size_lim) { - release_kmmio_fault_page(p->addr + size, &release_list); + release_kmmio_fault_page(addr + size, &release_list); size += page_level_size(l); } list_del_rcu(&p->list); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index d9a9e9fc75dd7b511050adc6bd4c914b0c4fcdaa..391b13402e403041222ef8e6bd22f60edaeb9446 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -405,13 +405,13 @@ bool sme_active(void) { return sme_me_mask && !sev_enabled; } -EXPORT_SYMBOL_GPL(sme_active); +EXPORT_SYMBOL(sme_active); bool sev_active(void) { return sme_me_mask && sev_enabled; } -EXPORT_SYMBOL_GPL(sev_active); +EXPORT_SYMBOL(sev_active); static const struct dma_map_ops sev_dma_ops = { .alloc = sev_alloc, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 96d456a94b0342eb967f918e4233498ac24e4349..004abf9ebf1222c169448090f7f1c570635bce41 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } #else + static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) { - free_page((unsigned long)pgd); + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); } #endif /* CONFIG_X86_PAE */ diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6b9bf023a700559b87ae7ac89570d9bbd26d1f05..c3c5274410a908e762aed936406006d63c3116ac 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c new file mode 100644 index 0000000000000000000000000000000000000000..ce38f165489b5a13d92091c8671879f30ce44e20 --- /dev/null +++ b/arch/x86/mm/pti.c @@ -0,0 +1,368 @@ +/* + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This code is based in part on work published here: + * + * https://github.com/IAIK/KAISER + * + * The original work was written by and and signed off by for the Linux + * kernel by: + * + * Signed-off-by: Richard Fellner + * Signed-off-by: Moritz Lipp + * Signed-off-by: Daniel Gruss + * Signed-off-by: Michael Schwarz + * + * Major changes to the original code by: Dave Hansen + * Mostly rewritten by Thomas Gleixner and + * Andy Lutomirsky + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef pr_fmt +#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt + +/* Backporting helper */ +#ifndef __GFP_NOTRACK +#define __GFP_NOTRACK 0 +#endif + +static void __init pti_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + pr_info("%s\n", reason); +} + +static void __init pti_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + pr_info("%s\n", reason); +} + +void __init pti_check_boottime_disable(void) +{ + char arg[5]; + int ret; + + if (hypervisor_is_type(X86_HYPER_XEN_PV)) { + pti_print_if_insecure("disabled on XEN PV."); + return; + } + + ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); + if (ret > 0) { + if (ret == 3 && !strncmp(arg, "off", 3)) { + pti_print_if_insecure("disabled on command line."); + return; + } + if (ret == 2 && !strncmp(arg, "on", 2)) { + pti_print_if_secure("force enabled on command line."); + goto enable; + } + if (ret == 4 && !strncmp(arg, "auto", 4)) + goto autosel; + } + + if (cmdline_find_option_bool(boot_command_line, "nopti")) { + pti_print_if_insecure("disabled on command line."); + return; + } + +autosel: + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + return; +enable: + setup_force_cpu_cap(X86_FEATURE_PTI); +} + +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +{ + /* + * Changes to the high (kernel) portion of the kernelmode page + * tables are not automatically propagated to the usermode tables. + * + * Users should keep in mind that, unlike the kernelmode tables, + * there is no vmalloc_fault equivalent for the usermode tables. + * Top-level entries added to init_mm's usermode pgd after boot + * will not be automatically propagated to other mms. + */ + if (!pgdp_maps_userspace(pgdp)) + return pgd; + + /* + * The user page tables get the full PGD, accessible from + * userspace: + */ + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + + /* + * If this is normal user memory, make it NX in the kernel + * pagetables so that, if we somehow screw up and return to + * usermode with the kernel CR3 loaded, we'll get a page fault + * instead of allowing user code to execute with the wrong CR3. + * + * As exceptions, we don't set NX if: + * - _PAGE_USER is not set. This could be an executable + * EFI runtime mapping or something similar, and the kernel + * may execute from it + * - we don't have NX support + * - we're clearing the PGD (i.e. the new pgd is not present). + */ + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && + (__supported_pte_mask & _PAGE_NX)) + pgd.pgd |= _PAGE_NX; + + /* return the copy of the PGD we want the kernel to use: */ + return pgd; +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a P4D on success, or NULL on failure. + */ +static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) +{ + pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + + if (address < PAGE_OFFSET) { + WARN_ONCE(1, "attempt to walk user address\n"); + return NULL; + } + + if (pgd_none(*pgd)) { + unsigned long new_p4d_page = __get_free_page(gfp); + if (!new_p4d_page) + return NULL; + + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); + } + BUILD_BUG_ON(pgd_large(*pgd) != 0); + + return p4d_offset(pgd, address); +} + +/* + * Walk the user copy of the page tables (optionally) trying to allocate + * page table pages on the way down. + * + * Returns a pointer to a PMD on success, or NULL on failure. + */ +static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + p4d_t *p4d = pti_user_pagetable_walk_p4d(address); + pud_t *pud; + + BUILD_BUG_ON(p4d_large(*p4d) != 0); + if (p4d_none(*p4d)) { + unsigned long new_pud_page = __get_free_page(gfp); + if (!new_pud_page) + return NULL; + + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); + } + + pud = pud_offset(p4d, address); + /* The user page tables do not use large mappings: */ + if (pud_large(*pud)) { + WARN_ON(1); + return NULL; + } + if (pud_none(*pud)) { + unsigned long new_pmd_page = __get_free_page(gfp); + if (!new_pmd_page) + return NULL; + + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); + } + + return pmd_offset(pud, address); +} + +#ifdef CONFIG_X86_VSYSCALL_EMULATION +/* + * Walk the shadow copy of the page tables (optionally) trying to allocate + * page table pages on the way down. Does not support large pages. + * + * Note: this is only used when mapping *new* kernel data into the + * user/shadow page tables. It is never used for userspace data. + * + * Returns a pointer to a PTE on success, or NULL on failure. + */ +static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +{ + gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); + pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pte_t *pte; + + /* We can't do anything sensible if we hit a large mapping. */ + if (pmd_large(*pmd)) { + WARN_ON(1); + return NULL; + } + + if (pmd_none(*pmd)) { + unsigned long new_pte_page = __get_free_page(gfp); + if (!new_pte_page) + return NULL; + + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page))); + } + + pte = pte_offset_kernel(pmd, address); + if (pte_flags(*pte) & _PAGE_USER) { + WARN_ONCE(1, "attempt to walk to user pte\n"); + return NULL; + } + return pte; +} + +static void __init pti_setup_vsyscall(void) +{ + pte_t *pte, *target_pte; + unsigned int level; + + pte = lookup_address(VSYSCALL_ADDR, &level); + if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) + return; + + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + if (WARN_ON(!target_pte)) + return; + + *target_pte = *pte; + set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir)); +} +#else +static void __init pti_setup_vsyscall(void) { } +#endif + +static void __init +pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) +{ + unsigned long addr; + + /* + * Clone the populated PMDs which cover start to end. These PMD areas + * can have holes. + */ + for (addr = start; addr < end; addr += PMD_SIZE) { + pmd_t *pmd, *target_pmd; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + pgd = pgd_offset_k(addr); + if (WARN_ON(pgd_none(*pgd))) + return; + p4d = p4d_offset(pgd, addr); + if (WARN_ON(p4d_none(*p4d))) + return; + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + continue; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + target_pmd = pti_user_pagetable_walk_pmd(addr); + if (WARN_ON(!target_pmd)) + return; + + /* + * Copy the PMD. That is, the kernelmode and usermode + * tables will share the last-level page tables of this + * address range + */ + *target_pmd = pmd_clear_flags(*pmd, clear); + } +} + +/* + * Clone a single p4d (i.e. a top-level entry on 4-level systems and a + * next-level entry on 5-level systems. + */ +static void __init pti_clone_p4d(unsigned long addr) +{ + p4d_t *kernel_p4d, *user_p4d; + pgd_t *kernel_pgd; + + user_p4d = pti_user_pagetable_walk_p4d(addr); + kernel_pgd = pgd_offset_k(addr); + kernel_p4d = p4d_offset(kernel_pgd, addr); + *user_p4d = *kernel_p4d; +} + +/* + * Clone the CPU_ENTRY_AREA into the user space visible page table. + */ +static void __init pti_clone_user_shared(void) +{ + pti_clone_p4d(CPU_ENTRY_AREA_BASE); +} + +/* + * Clone the ESPFIX P4D into the user space visinble page table + */ +static void __init pti_setup_espfix64(void) +{ +#ifdef CONFIG_X86_ESPFIX64 + pti_clone_p4d(ESPFIX_BASE_ADDR); +#endif +} + +/* + * Clone the populated PMDs of the entry and irqentry text and force it RO. + */ +static void __init pti_clone_entry_text(void) +{ + pti_clone_pmds((unsigned long) __entry_text_start, + (unsigned long) __irqentry_text_end, + _PAGE_RW | _PAGE_GLOBAL); +} + +/* + * Initialize kernel page table isolation + */ +void __init pti_init(void) +{ + if (!static_cpu_has(X86_FEATURE_PTI)) + return; + + pr_info("enabled\n"); + + pti_clone_user_shared(); + pti_clone_entry_text(); + pti_setup_espfix64(); + pti_setup_vsyscall(); +} diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 3118392cdf756bfc913d7a4137d5f7e0d46b046d..a1561957dccbb82d188d8209d76f7eddb780519a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -28,6 +28,38 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * We get here when we do something requiring a TLB invalidation + * but could not go invalidate all of the contexts. We do the + * necessary invalidation by clearing out the 'ctx_id' which + * forces a TLB flush when the context is loaded. + */ +void clear_asid_other(void) +{ + u16 asid; + + /* + * This is only expected to be set if we have disabled + * kernel _PAGE_GLOBAL pages. + */ + if (!static_cpu_has(X86_FEATURE_PTI)) { + WARN_ON_ONCE(1); + return; + } + + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { + /* Do not need to flush the current asid */ + if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid)) + continue; + /* + * Make sure the next time we go to switch to + * this asid, we do a flush: + */ + this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0); + } + this_cpu_write(cpu_tlbstate.invalidate_other, false); +} + atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1); @@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, return; } + if (this_cpu_read(cpu_tlbstate.invalidate_other)) + clear_asid_other(); + for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != next->context.ctx_id) @@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, *need_flush = true; } +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) +{ + unsigned long new_mm_cr3; + + if (need_flush) { + invalidate_user_asid(new_asid); + new_mm_cr3 = build_cr3(pgdir, new_asid); + } else { + new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); + } + + /* + * Caution: many callers of this function expect + * that load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask writes. + */ + write_cr3(new_mm_cr3); +} + void leave_mm(int cpu) { struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); @@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * isn't free. */ #ifdef CONFIG_DEBUG_VM - if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) { + if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { /* * If we were to BUG here, we'd be very likely to kill * the system so hard that we don't see the call trace. @@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); - write_cr3(build_cr3(next, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, true); /* * NB: This gets called via leave_mm() in the idle path @@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } else { /* The new ASID is already up to date. */ - write_cr3(build_cr3_noflush(next, new_asid)); + load_new_mm_cr3(next->pgd, new_asid, false); /* See above wrt _rcuidle. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); @@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void) !(cr4_read_shadow() & X86_CR4_PCIDE)); /* Force ASID 0 and force a TLB flush. */ - write_cr3(build_cr3(mm, 0)); + write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); @@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info) /* flush range by one by one 'invlpg' */ for (addr = f->start; addr < f->end; addr += PAGE_SIZE) - __flush_tlb_single(addr); + __flush_tlb_one(addr); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c index bb461cfd01abc78cdc45c6e69f013128e04ccdb4..526536c81ddc41d395fd971d909a3b687e46d989 100644 --- a/arch/x86/pci/broadcom_bus.c +++ b/arch/x86/pci/broadcom_bus.c @@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void) * We should get host bridge information from ACPI unless the BIOS * doesn't support it. */ - if (acpi_os_get_root_pointer()) + if (!acpi_disabled && acpi_os_get_root_pointer()) return 0; #endif diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7a5350d08cef711a14c29cf1f8fcedb70ecc7465..563049c483a12c5fe587e463fb96bff4dde22c5a 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str) } else if (!strcmp(str, "nocrs")) { pci_probe |= PCI_ROOT_NO_CRS; return NULL; +#ifdef CONFIG_PHYS_ADDR_T_64BIT + } else if (!strcmp(str, "big_root_window")) { + pci_probe |= PCI_BIG_ROOT_WINDOW; + return NULL; +#endif } else if (!strcmp(str, "earlydump")) { pci_early_dump_regs = 1; return NULL; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 1e996df687a3bc47fb796c4fe8025bcb148a6890..f6a26e3cb4763325465df6ec19efb282e26e41fd 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -662,9 +662,23 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); */ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) { - unsigned i; u32 base, limit, high; - struct resource *res, *conflict; + struct pci_dev *other; + struct resource *res; + unsigned i; + int r; + + if (!(pci_probe & PCI_BIG_ROOT_WINDOW)) + return; + + /* Check that we are the only device of that type */ + other = pci_get_device(dev->vendor, dev->device, NULL); + if (other != dev || + (other = pci_get_device(dev->vendor, dev->device, other))) { + /* This is a multi-socket system, don't touch it for now */ + pci_dev_put(other); + return; + } for (i = 0; i < 8; i++) { pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base); @@ -689,17 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) if (!res) return; + /* + * Allocate a 256GB window directly below the 0xfd00000000 hardware + * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6). + */ res->name = "PCI Bus 0000:00"; res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_WINDOW; - res->start = 0x100000000ull; + res->start = 0xbd00000000ull; res->end = 0xfd00000000ull - 1; - /* Just grab the free area behind system memory for this */ - while ((conflict = request_resource_conflict(&iomem_resource, res))) - res->start = conflict->end + 1; + r = request_resource(&iomem_resource, res); + if (r) { + kfree(res); + return; + } - dev_info(&dev->dev, "adding root bus resource %pR\n", res); + dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n", + res); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) | AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK; @@ -714,10 +736,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev) pci_bus_add_resource(dev->bus, res, 0); } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); #endif diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6a151ce70e865caadde95c859855c4b63283ad4b..2dd15e967c3f5257c530d53c7d4f51f2e3165190 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void) pud[j] = *pud_offset(p4d_k, vaddr); } } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; } + out: __flush_tlb_all(); @@ -196,6 +198,9 @@ static pgd_t *efi_pgd; * because we want to avoid inserting EFI region mappings (EFI_VA_END * to EFI_VA_START) into the standard kernel page tables. Everything * else can be shared, see efi_sync_low_kernel_mappings(). + * + * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the + * allocation. */ int __init efi_alloc_page_tables(void) { @@ -208,7 +213,7 @@ int __init efi_alloc_page_tables(void) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; - efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) return -ENOMEM; diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 8a99a2e96537a91db6574955aafeae1a5103ce02..5b513ccffde404adbd1ea2929226669f2a282825 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, /* * Update the first page pointer to skip over the CSH header. */ - cap_info->pages[0] += csh->headersize; + cap_info->phys[0] += csh->headersize; + + /* + * cap_info->capsule should point at a virtual mapping of the entire + * capsule, starting at the capsule header. Our image has the Quark + * security header prepended, so we cannot rely on the default vmap() + * mapping created by the generic capsule code. + * Given that the Quark firmware does not appear to care about the + * virtual mapping, let's just point cap_info->capsule at our copy + * of the capsule header. + */ + cap_info->capsule = &cap_info->header; return 1; } diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c index dc036e511f48d3adc3261ca4d4214c1c6244dbe6..5a0483e7bf662cb27044b7684567b644dfe49b81 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c @@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata) return 0; } -static const struct bt_sfi_data tng_bt_sfi_data __initdata = { +static struct bt_sfi_data tng_bt_sfi_data __initdata = { .setup = tng_bt_sfi_setup, }; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index f44c0bc95aa2f45ad42462a5f23f4db4672d1257..8538a6723171a5606058a8823ed1cbb2d343fdb6 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp, local_flush_tlb(); stat->d_alltlb++; } else { - __flush_tlb_one(msg->address); + __flush_tlb_single(msg->address); stat->d_onetlb++; } stat->d_requestee++; diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 5f6fd860820a3c5175609a5a3468262aac937604..e4cb9f4cde8ae224afe51ced1970805df293c956 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq, * on the specified blade to allow the sending of MSIs to the specified CPU. */ static int uv_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) + struct irq_data *irq_data, bool reserve) { uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data); return 0; diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c34bd8233f7c81ddff649e970f79802b9ae7ef27..5f64f30873e257757091b88f8e263711d8db548f 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) /* * UV NMI handler */ -int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) +static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) { struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi; int cpu = smp_processor_id(); @@ -1013,7 +1013,7 @@ void uv_nmi_init(void) } /* Setup HUB NMI info */ -void __init uv_nmi_setup_common(bool hubbed) +static void __init uv_nmi_setup_common(bool hubbed) { int size = sizeof(void *) * (1 << NODES_SHIFT); int cpu; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 84fcfde53f8f3f5bb4b85efc20ab106c419dcc11..a7d966964c6f20577c927cf5e618bc86b3331977 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ -#ifdef CONFIG_X86_32 store_idt(&ctxt->idt); -#else -/* CONFIG_X86_64 */ - store_idt((struct desc_ptr *)&ctxt->idt_limit); -#endif + /* * We save it here, but restore it only in the hibernate case. * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit @@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt) /* * segment registers */ -#ifdef CONFIG_X86_32 - savesegment(es, ctxt->es); - savesegment(fs, ctxt->fs); +#ifdef CONFIG_X86_32_LAZY_GS savesegment(gs, ctxt->gs); - savesegment(ss, ctxt->ss); -#else -/* CONFIG_X86_64 */ - asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); - asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); - asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); - asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); - asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); +#endif +#ifdef CONFIG_X86_64 + savesegment(gs, ctxt->gs); + savesegment(fs, ctxt->fs); + savesegment(ds, ctxt->ds); + savesegment(es, ctxt->es); rdmsrl(MSR_FS_BASE, ctxt->fs_base); - rdmsrl(MSR_GS_BASE, ctxt->gs_base); - rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); mtrr_save_fixed_ranges(NULL); rdmsrl(MSR_EFER, ctxt->efer); @@ -160,17 +152,19 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(cpu_tss, cpu); #ifdef CONFIG_X86_64 struct desc_struct *desc = get_cpu_gdt_rw(cpu); tss_desc tss; #endif - set_tss_desc(cpu, t); /* - * This just modifies memory; should not be - * necessary. But... This is necessary, because - * 386 hardware has concept of busy TSS or some - * similar stupidity. - */ + + /* + * We need to reload TR, which requires that we change the + * GDT entry to indicate "available" first. + * + * XXX: This could probably all be replaced by a call to + * force_reload_TR(). + */ + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); #ifdef CONFIG_X86_64 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); @@ -178,6 +172,9 @@ static void fix_processor_context(void) write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); syscall_init(); /* This sets MSR_*STAR and related */ +#else + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); #endif load_TR_desc(); /* This does ltr */ load_mm_ldt(current->active_mm); /* This does lldt */ @@ -190,9 +187,12 @@ static void fix_processor_context(void) } /** - * __restore_processor_state - restore the contents of CPU registers saved - * by __save_processor_state() - * @ctxt - structure to load the registers contents from + * __restore_processor_state - restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt - structure to load the registers contents from + * + * The asm code that gets us here will have restored a usable GDT, although + * it will be pointing to the wrong alias. */ static void notrace __restore_processor_state(struct saved_context *ctxt) { @@ -215,46 +215,52 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); + /* Restore the IDT. */ + load_idt(&ctxt->idt); + /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). + * Just in case the asm code got us here with the SS, DS, or ES + * out of sync with the GDT, update them. */ -#ifdef CONFIG_X86_32 - load_idt(&ctxt->idt); + loadsegment(ss, __KERNEL_DS); + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); + + /* + * Restore percpu access. Percpu access can happen in exception + * handlers or in complicated helpers like load_gs_index(). + */ +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); #else -/* CONFIG_X86_64 */ - load_idt((const struct desc_ptr *)&ctxt->idt_limit); + loadsegment(fs, __KERNEL_PERCPU); + loadsegment(gs, __KERNEL_STACK_CANARY); #endif + /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */ + fix_processor_context(); + /* - * segment registers + * Now that we have descriptor tables fully restored and working + * exception handling, restore the usermode segments. */ -#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_64 + loadsegment(ds, ctxt->es); loadsegment(es, ctxt->es); loadsegment(fs, ctxt->fs); - loadsegment(gs, ctxt->gs); - loadsegment(ss, ctxt->ss); + load_gs_index(ctxt->gs); /* - * sysenter MSRs + * Restore FSBASE and GSBASE after restoring the selectors, since + * restoring the selectors clobbers the bases. Keep in mind + * that MSR_KERNEL_GS_BASE is horribly misnamed. */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(); -#else -/* CONFIG_X86_64 */ - asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); - asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); - asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); - load_gs_index(ctxt->gs); - asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); - wrmsrl(MSR_FS_BASE, ctxt->fs_base); - wrmsrl(MSR_GS_BASE, ctxt->gs_base); - wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); +#elif defined(CONFIG_X86_32_LAZY_GS) + loadsegment(gs, ctxt->gs); #endif - fix_processor_context(); - do_fpu_end(); tsc_verify_tsc_adjust(true); x86_platform.restore_sched_clock_state(); diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 6b830d4cb4c8e8e78c44dd87a12f642234533b4e..de58533d3664cdf067aedecfc81af2729c3b539b 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg) return 0; if (reg == APIC_LVR) - return 0x10; + return 0x14; #ifdef CONFIG_X86_32 if (reg == APIC_LDR) return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d669e9d890017770456abe458f1161eb2509c09e..c9081c6671f0b7a05ecfaaf206e7e1ed2b1f456a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1,8 +1,12 @@ +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +#include +#endif #include #include #include #include +#include #include #include @@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +void __init arch_xen_balloon_init(struct resource *hostmem_resource) +{ + struct xen_memory_map memmap; + int rc; + unsigned int i, last_guest_ram; + phys_addr_t max_addr = PFN_PHYS(max_pfn); + struct e820_table *xen_e820_table; + const struct e820_entry *entry; + struct resource *res; + + if (!xen_initial_domain()) + return; + + xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); + if (!xen_e820_table) + return; + + memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); + set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); + rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); + if (rc) { + pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); + goto out; + } + + last_guest_ram = 0; + for (i = 0; i < memmap.nr_entries; i++) { + if (xen_e820_table->entries[i].addr >= max_addr) + break; + if (xen_e820_table->entries[i].type == E820_TYPE_RAM) + last_guest_ram = i; + } + + entry = &xen_e820_table->entries[last_guest_ram]; + if (max_addr >= entry->addr + entry->size) + goto out; /* No unallocated host RAM. */ + + hostmem_resource->start = max_addr; + hostmem_resource->end = entry->addr + entry->size; + + /* + * Mark non-RAM regions between the end of dom0 RAM and end of host RAM + * as unavailable. The rest of that region can be used for hotplug-based + * ballooning. + */ + for (; i < memmap.nr_entries; i++) { + entry = &xen_e820_table->entries[i]; + + if (entry->type == E820_TYPE_RAM) + continue; + + if (entry->addr >= hostmem_resource->end) + break; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto out; + + res->name = "Unavailable host RAM"; + res->start = entry->addr; + res->end = (entry->addr + entry->size < hostmem_resource->end) ? + entry->addr + entry->size : hostmem_resource->end; + rc = insert_resource(hostmem_resource, res); + if (rc) { + pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", + __func__, res->start, res->end, rc); + kfree(res); + goto out; + } + } + + out: + kfree(xen_e820_table); +} +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5b2b3f3f653112fbe00484f5dcae0de02543df3c..c047f42552e1a61ed0a5787d904681974cc05af1 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -88,6 +88,8 @@ #include "multicalls.h" #include "pmu.h" +#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */ + void *xen_initial_gdt; static int xen_cpu_up_prepare_pv(unsigned int cpu); @@ -622,7 +624,7 @@ static struct trap_array_entry trap_array[] = { { simd_coprocessor_error, xen_simd_coprocessor_error, false }, }; -static bool get_trap_addr(void **addr, unsigned int ist) +static bool __ref get_trap_addr(void **addr, unsigned int ist) { unsigned int nr; bool ist_okay = false; @@ -644,6 +646,14 @@ static bool get_trap_addr(void **addr, unsigned int ist) } } + if (nr == ARRAY_SIZE(trap_array) && + *addr >= (void *)early_idt_handler_array[0] && + *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) { + nr = (*addr - (void *)early_idt_handler_array[0]) / + EARLY_IDT_HANDLER_SIZE; + *addr = (void *)xen_early_idt_handler_array[nr]; + } + if (WARN_ON(ist != 0 && !ist_okay)) return false; @@ -818,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0) mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); - this_cpu_write(cpu_tss.x86_tss.sp0, sp0); + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } void xen_set_iopl_mask(unsigned mask) @@ -1250,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); x86_configure_nx(); /* Get mfn list */ @@ -1262,6 +1273,21 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_gdt(0); xen_init_irq_ops(); + + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + + /* + * Setup xen_vcpu early because idt_setup_early_handler needs it for + * local_irq_disable(), irqs_disabled(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); + + idt_setup_early_handler(); + xen_init_capabilities(); #ifdef CONFIG_X86_LOCAL_APIC @@ -1295,18 +1321,6 @@ asmlinkage __visible void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - /* Let's presume PV guests always boot on vCPU with id 0. */ - per_cpu(xen_vcpu_id, 0) = 0; - - /* - * Setup xen_vcpu early because start_kernel needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); local_irq_disable(); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index fc048ec686e7699b263254c79b482ccf935c21ef..d85076223a696d0b00bee7c22a9e28b1e66dc975 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, { struct { struct mmuext_op op; -#ifdef CONFIG_SMP - DECLARE_BITMAP(mask, num_processors); -#else DECLARE_BITMAP(mask, NR_CPUS); -#endif } *args; struct multicall_space mcs; + const size_t mc_entry_size = sizeof(args->op) + + sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus()); trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end); if (cpumask_empty(cpus)) return; /* nothing to do */ - mcs = xen_mc_entry(sizeof(*args)); + mcs = xen_mc_entry(mc_entry_size); args = mcs.args; args->op.arg2.vcpumask = to_cpumask(args->mask); @@ -1902,6 +1900,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* Graft it onto L4[511][510] */ copy_page(level2_kernel_pgt, l2); + /* + * Zap execute permission from the ident map. Due to the sharing of + * L1 entries we need to do this in the L2. + */ + if (__supported_pte_mask & _PAGE_NX) { + for (i = 0; i < PTRS_PER_PMD; ++i) { + if (pmd_none(level2_ident_pgt[i])) + continue; + level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX); + } + } + /* Copy the initial P->M table mappings if necessary. */ i = pgd_index(xen_start_info->mfn_list); if (i && i < pgd_index(__START_KERNEL_map)) @@ -2261,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: - case FIX_RO_IDT: #ifdef CONFIG_X86_32 case FIX_WP_TEST: # ifdef CONFIG_HIGHMEM @@ -2272,7 +2281,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif case FIX_TEXT_POKE0: case FIX_TEXT_POKE1: - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: /* All local page mappings */ pte = pfn_pte(phys, prot); break; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index c114ca767b3b8a382918e2b0160983fa257318db..6e0d2086eacbf37326467b5142e59750151a5328 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,7 +808,6 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { - bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -824,11 +823,10 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - discard = true; + type = E820_TYPE_UNUSABLE; } - if (!discard) - xen_align_and_add_e820_region(addr, chunk_size, type); + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 8a10c9a9e2b50651b2c8dd322956298402e79e7d..417b339e5c8e1aadedd20231c9be82ac93dbe728 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -15,6 +15,7 @@ #include +#include #include .macro xen_pv_trap name @@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat #endif xen_pv_trap hypervisor_callback + __INIT +ENTRY(xen_early_idt_handler_array) + i = 0 + .rept NUM_EXCEPTION_VECTORS + pop %rcx + pop %r11 + jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE + i = i + 1 + .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc + .endr +END(xen_early_idt_handler_array) + __FINIT + hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* * Xen64 iret frame: diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 75011b80660f6262206b6759e7b63a06a1d809ea..3b34745d0a52dd097d9c2d8955bd4afce1d24ee1 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -72,7 +72,7 @@ u64 xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); void xen_save_time_memory_area(void); void xen_restore_time_memory_area(void); -void __init xen_init_time_ops(void); +void __ref xen_init_time_ops(void); void __init xen_hvm_init_time_ops(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild index a5bcdfb890f1b77fa8f5f2b349a05d7f759fb661..837d4dd7678545dec75f53c21bcf5e2dea500202 100644 --- a/arch/xtensa/include/uapi/asm/Kbuild +++ b/arch/xtensa/include/uapi/asm/Kbuild @@ -2,6 +2,7 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bitsperlong.h +generic-y += bpf_perf_event.h generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h diff --git a/block/bio.c b/block/bio.c index 8bfdea58159ba9ffd972dd95717e0eee99101e0a..9ef6cf3addb38cae822d0e5c5ef18ba9e98cd2d7 100644 --- a/block/bio.c +++ b/block/bio.c @@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) bio->bi_disk = bio_src->bi_disk; bio->bi_partno = bio_src->bi_partno; bio_set_flag(bio, BIO_CLONED); + if (bio_flagged(bio_src, BIO_THROTTLED)) + bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; diff --git a/block/blk-core.c b/block/blk-core.c index b8881750a3acd705789050c845c942673da999a8..3ba4326a63b59632fad81e686bf8229eee07320b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -562,6 +562,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) } } +void blk_drain_queue(struct request_queue *q) +{ + spin_lock_irq(q->queue_lock); + __blk_drain_queue(q, true); + spin_unlock_irq(q->queue_lock); +} + /** * blk_queue_bypass_start - enter queue bypass mode * @q: queue of interest @@ -689,8 +696,6 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); spin_lock_irq(lock); - if (!q->mq_ops) - __blk_drain_queue(q, true); queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); diff --git a/block/blk-map.c b/block/blk-map.c index b21f8e86f1207f9b76bf3e2083fcf72b5062f0b7..d3a94719f03fb2af81d6270d6fc9ed58f0dde373 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -12,22 +12,29 @@ #include "blk.h" /* - * Append a bio to a passthrough request. Only works can be merged into - * the request based on the driver constraints. + * Append a bio to a passthrough request. Only works if the bio can be merged + * into the request based on the driver constraints. */ -int blk_rq_append_bio(struct request *rq, struct bio *bio) +int blk_rq_append_bio(struct request *rq, struct bio **bio) { - blk_queue_bounce(rq->q, &bio); + struct bio *orig_bio = *bio; + + blk_queue_bounce(rq->q, bio); if (!rq->bio) { - blk_rq_bio_prep(rq->q, rq, bio); + blk_rq_bio_prep(rq->q, rq, *bio); } else { - if (!ll_back_merge_fn(rq->q, rq, bio)) + if (!ll_back_merge_fn(rq->q, rq, *bio)) { + if (orig_bio != *bio) { + bio_put(*bio); + *bio = orig_bio; + } return -EINVAL; + } - rq->biotail->bi_next = bio; - rq->biotail = bio; - rq->__data_len += bio->bi_iter.bi_size; + rq->biotail->bi_next = *bio; + rq->biotail = *bio; + rq->__data_len += (*bio)->bi_iter.bi_size; } return 0; @@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq, * We link the bounce buffer in and could have to traverse it * later so we have to get a ref to prevent it from being freed */ - ret = blk_rq_append_bio(rq, bio); - bio_get(bio); + ret = blk_rq_append_bio(rq, &bio); if (ret) { - bio_endio(bio); __blk_rq_unmap_user(orig_bio); - bio_put(bio); return ret; } + bio_get(bio); return 0; } @@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, int reading = rq_data_dir(rq) == READ; unsigned long addr = (unsigned long) kbuf; int do_copy = 0; - struct bio *bio; + struct bio *bio, *orig_bio; int ret; if (len > (queue_max_hw_sectors(q) << 9)) @@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (do_copy) rq->rq_flags |= RQF_COPY_USER; - ret = blk_rq_append_bio(rq, bio); + orig_bio = bio; + ret = blk_rq_append_bio(rq, &bio); if (unlikely(ret)) { /* request is too big */ - bio_put(bio); + bio_put(orig_bio); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 11097477eeab6591088ca817d4690535e114e699..3d379732749175ece7ae39e427e115f51621c521 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -161,6 +161,8 @@ void blk_freeze_queue(struct request_queue *q) * exported to drivers as the only user for unfreeze is blk_mq. */ blk_freeze_queue_start(q); + if (!q->mq_ops) + blk_drain_queue(q); blk_mq_freeze_queue_wait(q); } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 825bc29767e6699ac85675d319a9866b70cc9b84..d19f416d61012ac032c49608f0afe463c948e8bc 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2226,13 +2226,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, out_unlock: spin_unlock_irq(q->queue_lock); out: - /* - * As multiple blk-throtls may stack in the same issue path, we - * don't want bios to leave with the flag set. Clear the flag if - * being issued. - */ - if (!throttled) - bio_clear_flag(bio, BIO_THROTTLED); + bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) diff --git a/block/blk.h b/block/blk.h index 3f1446937aece26f38ceb66cf4e3d159a23df871..442098aa9463a37dad0dfccb1718eea65be6cdb3 100644 --- a/block/blk.h +++ b/block/blk.h @@ -330,4 +330,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_BOUNCE */ +extern void blk_drain_queue(struct request_queue *q); + #endif /* BLK_INTERNAL_H */ diff --git a/block/bounce.c b/block/bounce.c index fceb1a96480bfb9600e4664fa2b4992c8bb64210..1d05c422c932ad56d705f94deed6cce0891ff9d3 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, unsigned i = 0; bool bounce = false; int sectors = 0; + bool passthrough = bio_is_passthrough(*bio_orig); bio_for_each_segment(from, *bio_orig, iter) { if (i++ < BIO_MAX_PAGES) @@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (!bounce) return; - if (sectors < bio_sectors(*bio_orig)) { + if (!passthrough && sectors < bio_sectors(*bio_orig)) { bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split); bio_chain(bio, *bio_orig); generic_make_request(*bio_orig); *bio_orig = bio; } - bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set); + bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL : + bounce_bio_set); bio_for_each_segment_all(to, bio, i) { struct page *page = to->bv_page; diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index b4df317c291692f01138b91608dc6c80f71bb9aa..f95c60774ce8ca613417d3ccf54bee52010752ee 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -100,9 +100,13 @@ struct kyber_hctx_data { unsigned int cur_domain; unsigned int batching; wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS]; + struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS]; atomic_t wait_index[KYBER_NUM_DOMAINS]; }; +static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, + void *key); + static int rq_sched_domain(const struct request *rq) { unsigned int op = rq->cmd_flags; @@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) for (i = 0; i < KYBER_NUM_DOMAINS; i++) { INIT_LIST_HEAD(&khd->rqs[i]); + init_waitqueue_func_entry(&khd->domain_wait[i], + kyber_domain_wake); + khd->domain_wait[i].private = hctx; INIT_LIST_HEAD(&khd->domain_wait[i].entry); atomic_set(&khd->wait_index[i], 0); } @@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd, int nr; nr = __sbitmap_queue_get(domain_tokens); - if (nr >= 0) - return nr; /* * If we failed to get a domain token, make sure the hardware queue is * run when one becomes available. Note that this is serialized on * khd->lock, but we still need to be careful about the waker. */ - if (list_empty_careful(&wait->entry)) { - init_waitqueue_func_entry(wait, kyber_domain_wake); - wait->private = hctx; + if (nr < 0 && list_empty_careful(&wait->entry)) { ws = sbq_wait_ptr(domain_tokens, &khd->wait_index[sched_domain]); + khd->domain_ws[sched_domain] = ws; add_wait_queue(&ws->wait, wait); /* * Try again in case a token was freed before we got on the wait - * queue. The waker may have already removed the entry from the - * wait queue, but list_del_init() is okay with that. + * queue. */ nr = __sbitmap_queue_get(domain_tokens); - if (nr >= 0) { - unsigned long flags; + } - spin_lock_irqsave(&ws->wait.lock, flags); - list_del_init(&wait->entry); - spin_unlock_irqrestore(&ws->wait.lock, flags); - } + /* + * If we got a token while we were on the wait queue, remove ourselves + * from the wait queue to ensure that all wake ups make forward + * progress. It's possible that the waker already deleted the entry + * between the !list_empty_careful() check and us grabbing the lock, but + * list_del_init() is okay with that. + */ + if (nr >= 0 && !list_empty_careful(&wait->entry)) { + ws = khd->domain_ws[sched_domain]; + spin_lock_irq(&ws->wait.lock); + list_del_init(&wait->entry); + spin_unlock_irq(&ws->wait.lock); } + return nr; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 358749c38894e31481fb0d903c0d1f7504311aa8..35d4dcea381fbee7aa312667aa235456bb9d7918 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) unsigned int i; list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) { - ctx->rcvused -= rsgl->sg_num_bytes; + atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused); af_alg_free_sg(&rsgl->sgl); list_del(&rsgl->list); if (rsgl != &areq->first_rsgl) @@ -672,14 +672,15 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq) } tsgl = areq->tsgl; - for_each_sg(tsgl, sg, areq->tsgl_entries, i) { - if (!sg_page(sg)) - continue; - put_page(sg_page(sg)); - } + if (tsgl) { + for_each_sg(tsgl, sg, areq->tsgl_entries, i) { + if (!sg_page(sg)) + continue; + put_page(sg_page(sg)); + } - if (areq->tsgl && areq->tsgl_entries) sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl)); + } } EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls); @@ -1137,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, if (!af_alg_readable(sk)) break; - if (!ctx->used) { - err = af_alg_wait_for_data(sk, flags); - if (err) - return err; - } - seglen = min_t(size_t, (maxsize - len), msg_data_left(msg)); @@ -1168,7 +1163,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, areq->last_rsgl = rsgl; len += err; - ctx->rcvused += err; + atomic_add(err, &ctx->rcvused); rsgl->sg_num_bytes = err; iov_iter_advance(&msg->msg_iter, err); } diff --git a/crypto/algapi.c b/crypto/algapi.c index 60d7366ed343e9ad6a76b9dd36be22fbd7543c89..9a636f961572b99af844b1d8a28bebfd9463c9e2 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, spawn->alg = NULL; spawns = &inst->alg.cra_users; + + /* + * We may encounter an unregistered instance here, since + * an instance's spawns are set up prior to the instance + * being registered. An unregistered instance will have + * NULL ->cra_users.next, since ->cra_users isn't + * properly initialized until registration. But an + * unregistered instance cannot have any users, so treat + * it the same as ->cra_users being empty. + */ + if (spawns->next == NULL) + break; } } while ((spawns = crypto_more_spawns(alg, &stack, &top, &secondary_spawns))); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 805f485ddf1be4711a9d2ec47998964543f1d217..e9885a35ef6e2372a393cf3f0445924f9f044788 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t usedpages = 0; /* [in] RX bufs to be used from user */ size_t processed = 0; /* [in] TX bufs to be consumed */ + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* * Data length provided by caller via sendmsg/sendpage that has not * yet been processed. @@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = outlen; + aead_request_set_callback(&areq->cra_u.aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, af_alg_async_cb, areq); @@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, crypto_aead_decrypt(&areq->cra_u.aead_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = outlen; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { @@ -503,6 +509,7 @@ static void aead_release(void *private) struct aead_tfm *tfm = private; crypto_free_aead(tfm->aead); + crypto_put_default_null_skcipher2(); kfree(tfm); } @@ -535,7 +542,6 @@ static void aead_sock_destruct(struct sock *sk) unsigned int ivlen = crypto_aead_ivsize(tfm); af_alg_pull_tsgl(sk, ctx->used, NULL, 0); - crypto_put_default_null_skcipher2(); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); @@ -565,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 30cff827dd8fff048fa3e2ca7de770ab73022749..c5c47b680152034581957dc13ac1884f0c69f546 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, int err = 0; size_t len = 0; + if (!ctx->used) { + err = af_alg_wait_for_data(sk, flags); + if (err) + return err; + } + /* Allocate cipher request for current operation. */ areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) + crypto_skcipher_reqsize(tfm)); @@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, /* AIO operation */ sock_hold(sk); areq->iocb = msg->msg_iocb; + + /* Remember output size that will be generated. */ + areq->outlen = len; + skcipher_request_set_callback(&areq->cra_u.skcipher_req, CRYPTO_TFM_REQ_MAY_SLEEP, af_alg_async_cb, areq); @@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, crypto_skcipher_decrypt(&areq->cra_u.skcipher_req); /* AIO operation in progress */ - if (err == -EINPROGRESS || err == -EBUSY) { - /* Remember output size that will be generated. */ - areq->outlen = len; - + if (err == -EINPROGRESS || err == -EBUSY) return -EIOCBQUEUED; - } sock_put(sk); } else { @@ -384,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk) INIT_LIST_HEAD(&ctx->tsgl_list); ctx->len = len; ctx->used = 0; - ctx->rcvused = 0; + atomic_set(&ctx->rcvused, 0); ctx->more = 0; ctx->merge = 0; ctx->enc = 0; diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index c1ca1e86f5c4f86d1110343aa5194e2c3147f4d5..a6dcaa659aa8c162e2593df43c52259a0f2900c4 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -148,8 +148,10 @@ struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen) } ret = pkcs7_check_authattrs(ctx->msg); - if (ret < 0) + if (ret < 0) { + msg = ERR_PTR(ret); goto out; + } msg = ctx->msg; ctx->msg = NULL; diff --git a/crypto/asymmetric_keys/pkcs7_trust.c b/crypto/asymmetric_keys/pkcs7_trust.c index f6a009d88a33fb550654b11d2cfc4460c733a049..1f4e25f10049c2645c2a421b07bb21fdaf1c0857 100644 --- a/crypto/asymmetric_keys/pkcs7_trust.c +++ b/crypto/asymmetric_keys/pkcs7_trust.c @@ -69,7 +69,7 @@ static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7, /* Self-signed certificates form roots of their own, and if we * don't know them, then we can't accept them. */ - if (x509->next == x509) { + if (x509->signer == x509) { kleave(" = -ENOKEY [unknown self-signed]"); return -ENOKEY; } diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index 2d93d9eccb4d0c46d7b93306f50f590709d1b21f..39e6de0c2761fb03efab8f144fa1d1693e7cb035 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -59,11 +59,8 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7, desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; /* Digest the message [RFC2315 9.3] */ - ret = crypto_shash_init(desc); - if (ret < 0) - goto error; - ret = crypto_shash_finup(desc, pkcs7->data, pkcs7->data_len, - sig->digest); + ret = crypto_shash_digest(desc, pkcs7->data, pkcs7->data_len, + sig->digest); if (ret < 0) goto error; pr_devel("MsgDigest = [%*ph]\n", 8, sig->digest); @@ -150,7 +147,7 @@ static int pkcs7_find_key(struct pkcs7_message *pkcs7, pr_devel("Sig %u: Found cert serial match X.509[%u]\n", sinfo->index, certix); - if (x509->pub->pkey_algo != sinfo->sig->pkey_algo) { + if (strcmp(x509->pub->pkey_algo, sinfo->sig->pkey_algo) != 0) { pr_warn("Sig %u: X.509 algo and PKCS#7 sig algo don't match\n", sinfo->index); continue; diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index bc3035ef27a22b3ca593532dedc8020bb2773a2a..de996586762a83c0b3214aaf5fa561bde92f65c0 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -73,7 +73,7 @@ int public_key_verify_signature(const struct public_key *pkey, char alg_name_buf[CRYPTO_MAX_ALG_NAME]; void *output; unsigned int outlen; - int ret = -ENOMEM; + int ret; pr_devel("==>%s()\n", __func__); @@ -99,6 +99,7 @@ int public_key_verify_signature(const struct public_key *pkey, if (IS_ERR(tfm)) return PTR_ERR(tfm); + ret = -ENOMEM; req = akcipher_request_alloc(tfm, GFP_KERNEL); if (!req) goto error_free_tfm; @@ -127,7 +128,7 @@ int public_key_verify_signature(const struct public_key *pkey, * signature and returns that to us. */ ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait); - if (ret < 0) + if (ret) goto out_free_output; /* Do the actual verification step. */ @@ -142,6 +143,8 @@ int public_key_verify_signature(const struct public_key *pkey, error_free_tfm: crypto_free_akcipher(tfm); pr_devel("<==%s() = %d\n", __func__, ret); + if (WARN_ON_ONCE(ret > 0)) + ret = -EINVAL; return ret; } EXPORT_SYMBOL_GPL(public_key_verify_signature); diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index dd03fead1ca358fc4f21cdb0c5c65b9896a39b38..ce2df8c9c583970be0177d2a50de85316fb1c2b2 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -409,6 +409,8 @@ int x509_extract_key_data(void *context, size_t hdrlen, ctx->cert->pub->pkey_algo = "rsa"; /* Discard the BIT STRING metadata */ + if (vlen < 1 || *(const u8 *)value != 0) + return -EBADMSG; ctx->key = value + 1; ctx->key_size = vlen - 1; return 0; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index c9013582c026748a10b09eeb09d5c71f0571ea7a..9338b4558cdc52b70a86c820ef2d39186bf1e3c3 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -79,11 +79,7 @@ int x509_get_sig_params(struct x509_certificate *cert) desc->tfm = tfm; desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - ret = crypto_shash_init(desc); - if (ret < 0) - goto error_2; - might_sleep(); - ret = crypto_shash_finup(desc, cert->tbs, cert->tbs_size, sig->digest); + ret = crypto_shash_digest(desc, cert->tbs, cert->tbs_size, sig->digest); if (ret < 0) goto error_2; @@ -135,7 +131,7 @@ int x509_check_for_self_signed(struct x509_certificate *cert) } ret = -EKEYREJECTED; - if (cert->pub->pkey_algo != cert->sig->pkey_algo) + if (strcmp(cert->pub->pkey_algo, cert->sig->pkey_algo) != 0) goto out; ret = public_key_verify_signature(cert->pub, cert->sig); diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index db1bc3147bc4708b5a4e16c36844e5452101bf64..600afa99941fe07470b74f3c142b59716656df92 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, algt->mask)); if (IS_ERR(poly)) return PTR_ERR(poly); + poly_hash = __crypto_hash_alg_common(poly); + + err = -EINVAL; + if (poly_hash->digestsize != POLY1305_DIGEST_SIZE) + goto out_put_poly; err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, ctx = aead_instance_ctx(inst); ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize; - poly_hash = __crypto_hash_alg_common(poly); err = crypto_init_ahash_spawn(&ctx->poly, poly_hash, aead_crypto_instance(inst)); if (err) diff --git a/crypto/hmac.c b/crypto/hmac.c index 92871dc2a63ec66ca628df3a44b025b7ef6f247e..e74730224f0a5f6346bb8ae7b80f3ed5e6cb6281 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -195,11 +195,15 @@ static int hmac_create(struct crypto_template *tmpl, struct rtattr **tb) salg = shash_attr_alg(tb[1], 0, 0); if (IS_ERR(salg)) return PTR_ERR(salg); + alg = &salg->base; + /* The underlying hash algorithm must be unkeyed */ err = -EINVAL; + if (crypto_shash_alg_has_setkey(salg)) + goto out_put_alg; + ds = salg->digestsize; ss = salg->statesize; - alg = &salg->base; if (ds > alg->cra_blocksize || ss < alg->cra_blocksize) goto out_put_alg; diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c index 4e64726588524f137acd590809bef11673695ed2..eca04d3729b37c696c2dac4b0ac472422f30615d 100644 --- a/crypto/mcryptd.c +++ b/crypto/mcryptd.c @@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue, pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + spin_lock_init(&cpu_queue->q_lock); } return 0; } @@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue, int cpu, err; struct mcryptd_cpu_queue *cpu_queue; - cpu = get_cpu(); - cpu_queue = this_cpu_ptr(queue->cpu_queue); - rctx->tag.cpu = cpu; + cpu_queue = raw_cpu_ptr(queue->cpu_queue); + spin_lock(&cpu_queue->q_lock); + cpu = smp_processor_id(); + rctx->tag.cpu = smp_processor_id(); err = crypto_enqueue_request(&cpu_queue->queue, request); pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", cpu, cpu_queue, request); + spin_unlock(&cpu_queue->q_lock); queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); - put_cpu(); return err; } @@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); i = 0; while (i < MCRYPTD_BATCH || single_task_running()) { - /* - * preempt_disable/enable is used to prevent - * being preempted by mcryptd_enqueue_request() - */ - local_bh_disable(); - preempt_disable(); + + spin_lock_bh(&cpu_queue->q_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); - local_bh_enable(); + spin_unlock_bh(&cpu_queue->q_lock); if (!req) { mcryptd_opportunistic_flush(); @@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work) ++i; } if (cpu_queue->queue.qlen) - queue_work(kcrypto_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work); } void mcryptd_flusher(struct work_struct *__work) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ee9cfb99fe256af06ae7ad5d946c3b76d90de1e9..f8ec3d4ba4a80f8eefed739d9e8a852865a7ac02 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->child); } +static void pcrypt_free(struct aead_instance *inst) +{ + struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); + + crypto_drop_aead(&ctx->spawn); + kfree(inst); +} + static int pcrypt_init_instance(struct crypto_instance *inst, struct crypto_alg *alg) { @@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, inst->alg.encrypt = pcrypt_aead_encrypt; inst->alg.decrypt = pcrypt_aead_decrypt; + inst->free = pcrypt_free; + err = aead_register_instance(tmpl, inst); if (err) goto out_drop_aead; @@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb) return -EINVAL; } -static void pcrypt_free(struct crypto_instance *inst) -{ - struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - - crypto_drop_aead(&ctx->spawn); - kfree(inst); -} - static int pcrypt_cpumask_change_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt) static struct crypto_template pcrypt_tmpl = { .name = "pcrypt", .create = pcrypt_create, - .free = pcrypt_free, .module = THIS_MODULE, }; diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c index 0b66dc8246068aa084dd0b44210b04dee5f2bccb..cad395d70d78e18527866bf1a3f6452c338e4c7c 100644 --- a/crypto/rsa_helper.c +++ b/crypto/rsa_helper.c @@ -30,7 +30,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag, return -EINVAL; if (fips_enabled) { - while (!*ptr && n_sz) { + while (n_sz && !*ptr) { ptr++; n_sz--; } diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index f550b5d9463074b16670129341de59e069f8509c..d7da0eea5622af96f63300ad45c35450951551e1 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -188,13 +188,6 @@ static int encrypt(struct blkcipher_desc *desc, salsa20_ivsetup(ctx, walk.iv); - if (likely(walk.nbytes == nbytes)) - { - salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, - walk.src.virt.addr, nbytes); - return blkcipher_walk_done(desc, &walk, 0); - } - while (walk.nbytes >= 64) { salsa20_encrypt_bytes(ctx, walk.dst.virt.addr, walk.src.virt.addr, diff --git a/crypto/shash.c b/crypto/shash.c index 325a14da58278f01b8c1ffd92bdd8990db2860c4..e849d3ee2e2728d346df1f21f6a8d4db57fc42c5 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -25,11 +25,12 @@ static const struct crypto_type crypto_shash_type; -static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } +EXPORT_SYMBOL_GPL(shash_no_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 778e0ff42bfa801eda5be848da9e6747ebbc2626..11af5fd6a443570550e1dac5b0a429b2cae801b1 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, walk->total = req->cryptlen; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk, scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); - walk->iv = req->iv; - walk->oiv = req->iv; - walk->flags &= ~SKCIPHER_WALK_SLEEP; walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? SKCIPHER_WALK_SLEEP : 0; @@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, int err; walk->nbytes = 0; + walk->iv = req->iv; + walk->oiv = req->iv; if (unlikely(!walk->total)) return 0; @@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, scatterwalk_done(&walk->in, 0, walk->total); scatterwalk_done(&walk->out, 0, walk->total); - walk->iv = req->iv; - walk->oiv = req->iv; - if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) walk->flags |= SKCIPHER_WALK_SLEEP; else diff --git a/drivers/Makefile b/drivers/Makefile index 1d034b6804310a1d740b4e8aa40c649922748d77..e06f7f633f73ff285c64e19b89041d17e2be23ce 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -105,6 +105,7 @@ obj-$(CONFIG_TC) += tc/ obj-$(CONFIG_UWB) += uwb/ obj-$(CONFIG_USB_PHY) += usb/ obj-$(CONFIG_USB) += usb/ +obj-$(CONFIG_USB_SUPPORT) += usb/ obj-$(CONFIG_PCI) += usb/ obj-$(CONFIG_USB_GADGET) += usb/ obj-$(CONFIG_OF) += usb/ diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6742f6c68034c5e833505d294902dd97c274c1b0..9bff853e85f37831d8d053a2aa363f139537c9b5 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1007,7 +1007,7 @@ static ssize_t erst_reader(struct pstore_record *record) /* The record may be cleared by others, try read next record */ if (len == -ENOENT) goto skip; - else if (len < sizeof(*rcd)) { + else if (len < 0 || len < sizeof(*rcd)) { rc = -EIO; goto out; } diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 21c28433c590a4aec323bdcea19a0e1426c751e4..06ea4749ebd9826a3d7b8b0a9798a1cc797f4d61 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -949,7 +949,7 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) } *val = 0; - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; @@ -988,7 +988,7 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; - if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) + if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) vaddr = reg_res->sys_mem_vaddr; @@ -1035,14 +1035,15 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) *lowest_non_linear_reg, *nominal_reg; u64 high, low, nom, min_nonlinear; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); - struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; + struct cppc_pcc_data *pcc_ss_data; int ret = 0, regs_in_pcc = 0; - if (!cpc_desc) { + if (!cpc_desc || pcc_ss_id < 0) { pr_debug("No CPC descriptor for CPU:%d\n", cpunum); return -ENODEV; } + pcc_ss_data = pcc_data[pcc_ss_id]; highest_reg = &cpc_desc->cpc_regs[HIGHEST_PERF]; lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF]; lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF]; @@ -1095,15 +1096,16 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) struct cpc_register_resource *delivered_reg, *reference_reg, *ref_perf_reg, *ctr_wrap_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); - struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; + struct cppc_pcc_data *pcc_ss_data; u64 delivered, reference, ref_perf, ctr_wrap_time; int ret = 0, regs_in_pcc = 0; - if (!cpc_desc) { + if (!cpc_desc || pcc_ss_id < 0) { pr_debug("No CPC descriptor for CPU:%d\n", cpunum); return -ENODEV; } + pcc_ss_data = pcc_data[pcc_ss_id]; delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR]; reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR]; ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; @@ -1169,14 +1171,15 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cpc_register_resource *desired_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); - struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; + struct cppc_pcc_data *pcc_ss_data; int ret = 0; - if (!cpc_desc) { + if (!cpc_desc || pcc_ss_id < 0) { pr_debug("No CPC descriptor for CPU:%d\n", cpu); return -ENODEV; } + pcc_ss_data = pcc_data[pcc_ss_id]; desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; /* @@ -1301,7 +1304,7 @@ unsigned int cppc_get_transition_latency(int cpu_num) struct cpc_desc *cpc_desc; struct cpc_register_resource *desired_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu_num); - struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id]; + struct cppc_pcc_data *pcc_ss_data; cpc_desc = per_cpu(cpc_desc_ptr, cpu_num); if (!cpc_desc) @@ -1311,6 +1314,10 @@ unsigned int cppc_get_transition_latency(int cpu_num) if (!CPC_IN_PCC(desired_reg)) return CPUFREQ_ETERNAL; + if (pcc_ss_id < 0) + return CPUFREQ_ETERNAL; + + pcc_ss_data = pcc_data[pcc_ss_id]; if (pcc_ss_data->pcc_mpar) latency_ns = 60 * (1000 * 1000 * 1000 / pcc_ss_data->pcc_mpar); diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index e4ffaeec9ec204110f7e50c5cff49172a486f081..a4c8ad98560dc4a3a4e21bd1bbb825bd73d237b7 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1138,7 +1138,7 @@ int acpi_subsys_thaw_noirq(struct device *dev) * skip all of the subsequent "thaw" callbacks for the device. */ if (dev_pm_smart_suspend_and_suspended(dev)) { - dev->power.direct_complete = true; + dev_pm_skip_next_resume_phases(dev); return 0; } diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ff2580e7611d18c6d56c58d50c2cbc3a2d54aa36..abeb4df4f22e43d7f0d1398af9962135a37af4b6 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, dev_name(&adev_dimm->dev)); return -ENXIO; } + /* + * Record nfit_mem for the notification path to track back to + * the nfit sysfs attributes for this dimm device object. + */ + dev_set_drvdata(&adev_dimm->dev, nfit_mem); /* * Until standardization materializes we need to consider 4 @@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data) sysfs_put(nfit_mem->flags_attr); nfit_mem->flags_attr = NULL; } - if (adev_dimm) + if (adev_dimm) { acpi_remove_notify_handler(adev_dimm->handle, ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify); + dev_set_drvdata(&adev_dimm->dev, NULL); + } } mutex_unlock(&acpi_desc->init_mutex); } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a73596a4f804c817bfd277d3ff51eb42ca969bae..a7ecfde66b7b34f44cc9cffeac0be1b703e29aff 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -482,7 +482,8 @@ enum binder_deferred_state { * @tsk task_struct for group_leader of process * (invariant after initialized) * @files files_struct for process - * (invariant after initialized) + * (protected by @files_lock) + * @files_lock mutex to protect @files * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -530,6 +531,7 @@ struct binder_proc { int pid; struct task_struct *tsk; struct files_struct *files; + struct mutex files_lock; struct hlist_node deferred_work_node; int deferred_work; bool is_dead; @@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) { - struct files_struct *files = proc->files; unsigned long rlim_cur; unsigned long irqs; + int ret; - if (files == NULL) - return -ESRCH; - - if (!lock_task_sighand(proc->tsk, &irqs)) - return -EMFILE; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + ret = -ESRCH; + goto err; + } + if (!lock_task_sighand(proc->tsk, &irqs)) { + ret = -EMFILE; + goto err; + } rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE); unlock_task_sighand(proc->tsk, &irqs); - return __alloc_fd(files, 0, rlim_cur, flags); + ret = __alloc_fd(proc->files, 0, rlim_cur, flags); +err: + mutex_unlock(&proc->files_lock); + return ret; } /* @@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags) static void task_fd_install( struct binder_proc *proc, unsigned int fd, struct file *file) { + mutex_lock(&proc->files_lock); if (proc->files) __fd_install(proc->files, fd, file); + mutex_unlock(&proc->files_lock); } /* @@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) { int retval; - if (proc->files == NULL) - return -ESRCH; - + mutex_lock(&proc->files_lock); + if (proc->files == NULL) { + retval = -ESRCH; + goto err; + } retval = __close_fd(proc->files, fd); /* can't restart close syscall because file table entry was cleared */ if (unlikely(retval == -ERESTARTSYS || @@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) retval == -ERESTARTNOHAND || retval == -ERESTART_RESTARTBLOCK)) retval = -EINTR; - +err: + mutex_unlock(&proc->files_lock); return retval; } @@ -1947,6 +1960,26 @@ static void binder_send_failed_reply(struct binder_transaction *t, } } +/** + * binder_cleanup_transaction() - cleans up undelivered transaction + * @t: transaction that needs to be cleaned up + * @reason: reason the transaction wasn't delivered + * @error_code: error to return to caller (if synchronous call) + */ +static void binder_cleanup_transaction(struct binder_transaction *t, + const char *reason, + uint32_t error_code) +{ + if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) { + binder_send_failed_reply(t, error_code); + } else { + binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, + "undelivered transaction %d, %s\n", + t->debug_id, reason); + binder_free_transaction(t); + } +} + /** * binder_validate_object() - checks for a valid metadata object in a buffer. * @buffer: binder_buffer that we're parsing. @@ -4015,12 +4048,20 @@ static int binder_thread_read(struct binder_proc *proc, if (put_user(cmd, (uint32_t __user *)ptr)) { if (t_from) binder_thread_dec_tmpref(t_from); + + binder_cleanup_transaction(t, "put_user failed", + BR_FAILED_REPLY); + return -EFAULT; } ptr += sizeof(uint32_t); if (copy_to_user(ptr, &tr, sizeof(tr))) { if (t_from) binder_thread_dec_tmpref(t_from); + + binder_cleanup_transaction(t, "copy_to_user failed", + BR_FAILED_REPLY); + return -EFAULT; } ptr += sizeof(tr); @@ -4090,15 +4131,9 @@ static void binder_release_work(struct binder_proc *proc, struct binder_transaction *t; t = container_of(w, struct binder_transaction, work); - if (t->buffer->target_node && - !(t->flags & TF_ONE_WAY)) { - binder_send_failed_reply(t, BR_DEAD_REPLY); - } else { - binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, - "undelivered transaction %d\n", - t->debug_id); - binder_free_transaction(t); - } + + binder_cleanup_transaction(t, "process died.", + BR_DEAD_REPLY); } break; case BINDER_WORK_RETURN_ERROR: { struct binder_error *e = container_of( @@ -4605,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) ret = binder_alloc_mmap_handler(&proc->alloc, vma); if (ret) return ret; + mutex_lock(&proc->files_lock); proc->files = get_files_struct(current); + mutex_unlock(&proc->files_lock); return 0; err_bad_arg: @@ -4629,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + mutex_init(&proc->files_lock); INIT_LIST_HEAD(&proc->todo); proc->default_priority = task_nice(current); binder_dev = container_of(filp->private_data, struct binder_device, @@ -4881,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work) files = NULL; if (defer & BINDER_DEFERRED_PUT_FILES) { + mutex_lock(&proc->files_lock); files = proc->files; if (files) proc->files = NULL; + mutex_unlock(&proc->files_lock); } if (defer & BINDER_DEFERRED_FLUSH) diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c index 80854f71559a319fc8fb0ca4b0dd15eb7aec6460..0ae6971c2a4cfcd3bba93856335f4a031137385c 100644 --- a/drivers/ata/ahci_mtk.c +++ b/drivers/ata/ahci_mtk.c @@ -1,5 +1,5 @@ /* - * MeidaTek AHCI SATA driver + * MediaTek AHCI SATA driver * * Copyright (c) 2017 MediaTek Inc. * Author: Ryder Lee @@ -25,7 +25,7 @@ #include #include "ahci.h" -#define DRV_NAME "ahci" +#define DRV_NAME "ahci-mtk" #define SYS_CFG 0x14 #define SYS_CFG_SATA_MSK GENMASK(31, 30) @@ -192,5 +192,5 @@ static struct platform_driver mtk_ahci_driver = { }; module_platform_driver(mtk_ahci_driver); -MODULE_DESCRIPTION("MeidaTek SATA AHCI Driver"); +MODULE_DESCRIPTION("MediaTek SATA AHCI Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index b6b0bf76dfc7bb7fe90f45418aab974bf73b6f87..2685f28160f70764ee4013930239566031c9b058 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -35,6 +35,8 @@ /* port register default value */ #define AHCI_PORT_PHY_1_CFG 0xa003fffe +#define AHCI_PORT_PHY2_CFG 0x28184d1f +#define AHCI_PORT_PHY3_CFG 0x0e081509 #define AHCI_PORT_TRANS_CFG 0x08000029 #define AHCI_PORT_AXICC_CFG 0x3fffffff @@ -183,6 +185,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -190,6 +194,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) case AHCI_LS2080A: writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -201,6 +207,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -212,6 +220,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) writel(readl(qpriv->ecc_addr) | ECC_DIS_LS1088A, qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); @@ -219,6 +229,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) case AHCI_LS2088A: writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); + writel(AHCI_PORT_PHY2_CFG, reg_base + PORT_PHY2); + writel(AHCI_PORT_PHY3_CFG, reg_base + PORT_PHY3); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 2a882929de4aa3cfcce6b46de459ee7f7360a075..8193b38a1cae7a8d738fb4c29828654dd02f0572 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3082,13 +3082,19 @@ int sata_down_spd_limit(struct ata_link *link, u32 spd_limit) bit = fls(mask) - 1; mask &= ~(1 << bit); - /* Mask off all speeds higher than or equal to the current - * one. Force 1.5Gbps if current SPD is not available. + /* + * Mask off all speeds higher than or equal to the current one. At + * this point, if current SPD is not available and we previously + * recorded the link speed from SStatus, the driver has already + * masked off the highest bit so mask should already be 1 or 0. + * Otherwise, we should not force 1.5Gbps on a link where we have + * not previously recorded speed from SStatus. Just return in this + * case. */ if (spd > 1) mask &= (1 << (spd - 1)) - 1; else - mask &= 1; + return -EINVAL; /* were we already at the bottom? */ if (!mask) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index ffd8d33c6e0f044dece4e4bcc4560aebd68290ac..6db2e34bd52f2e5521e72ac2b17e1ee1fe43cc44 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -82,7 +82,7 @@ static int pdc2027x_set_mode(struct ata_link *link, struct ata_device **r_failed * is issued to the device. However, if the controller clock is 133MHz, * the following tables must be used. */ -static struct pdc2027x_pio_timing { +static const struct pdc2027x_pio_timing { u8 value0, value1, value2; } pdc2027x_pio_timing_tbl[] = { { 0xfb, 0x2b, 0xac }, /* PIO mode 0 */ @@ -92,7 +92,7 @@ static struct pdc2027x_pio_timing { { 0x23, 0x09, 0x25 }, /* PIO mode 4, IORDY on, Prefetch off */ }; -static struct pdc2027x_mdma_timing { +static const struct pdc2027x_mdma_timing { u8 value0, value1; } pdc2027x_mdma_timing_tbl[] = { { 0xdf, 0x5f }, /* MDMA mode 0 */ @@ -100,7 +100,7 @@ static struct pdc2027x_mdma_timing { { 0x69, 0x25 }, /* MDMA mode 2 */ }; -static struct pdc2027x_udma_timing { +static const struct pdc2027x_udma_timing { u8 value0, value1, value2; } pdc2027x_udma_timing_tbl[] = { { 0x4a, 0x0f, 0xd5 }, /* UDMA mode 0 */ @@ -649,7 +649,7 @@ static long pdc_detect_pll_input_clock(struct ata_host *host) * @host: target ATA host * @board_idx: board identifier */ -static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx) +static void pdc_hardware_init(struct ata_host *host, unsigned int board_idx) { long pll_clock; @@ -665,8 +665,6 @@ static int pdc_hardware_init(struct ata_host *host, unsigned int board_idx) /* Adjust PLL control register */ pdc_adjust_pll(host, pll_clock, board_idx); - - return 0; } /** @@ -753,8 +751,7 @@ static int pdc2027x_init_one(struct pci_dev *pdev, //pci_enable_intx(pdev); /* initialize adapter */ - if (pdc_hardware_init(host, board_idx) != 0) - return -EIO; + pdc_hardware_init(host, board_idx); pci_set_master(pdev); return ata_host_activate(host, pdev->irq, ata_bmdma_interrupt, @@ -778,8 +775,7 @@ static int pdc2027x_reinit_one(struct pci_dev *pdev) else board_idx = PDC_UDMA_133; - if (pdc_hardware_init(host, board_idx)) - return -EIO; + pdc_hardware_init(host, board_idx); ata_host_resume(host); return 0; diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 2c3cab066871680b9a6a5e778e8deb20b57f99b9..49fd50fccd48bef76feb90bce09f6f78fe3e0e5f 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -91,22 +91,23 @@ config FIRMWARE_IN_KERNEL depends on FW_LOADER default y help - The kernel source tree includes a number of firmware 'blobs' - that are used by various drivers. The recommended way to - use these is to run "make firmware_install", which, after - converting ihex files to binary, copies all of the needed - binary files in firmware/ to /lib/firmware/ on your system so - that they can be loaded by userspace helpers on request. + Various drivers in the kernel source tree may require firmware, + which is generally available in your distribution's linux-firmware + package. + + The linux-firmware package should install firmware into + /lib/firmware/ on your system, so they can be loaded by userspace + helpers on request. Enabling this option will build each required firmware blob - into the kernel directly, where request_firmware() will find - them without having to call out to userspace. This may be - useful if your root file system requires a device that uses - such firmware and do not wish to use an initrd. + specified by EXTRA_FIRMWARE into the kernel directly, where + request_firmware() will find them without having to call out to + userspace. This may be useful if your root file system requires a + device that uses such firmware and you do not wish to use an + initrd. This single option controls the inclusion of firmware for - every driver that uses request_firmware() and ships its - firmware in the kernel source tree, which avoids a + every driver that uses request_firmware(), which avoids a proliferation of 'Include firmware for xxx device' options. Say 'N' and let firmware be loaded from userspace. @@ -235,6 +236,9 @@ config GENERIC_CPU_DEVICES config GENERIC_CPU_AUTOPROBE bool +config GENERIC_CPU_VULNERABILITIES + bool + config SOC_BUS bool select GLOB diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index eb3af2739537a8def39259206e2345a2adc72c71..07532d83be0bca7d06aa5e99670ac920435e0ed9 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } +static bool cache_node_is_unified(struct cacheinfo *this_leaf) +{ + return of_property_read_bool(this_leaf->of_node, "cache-unified"); +} + static void cache_of_override_properties(unsigned int cpu) { int index; @@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu) for (index = 0; index < cache_leaves(cpu); index++) { this_leaf = this_cpu_ci->info_list + index; + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf)) + this_leaf->type = CACHE_TYPE_UNIFIED; cache_size(this_leaf); cache_get_line_size(this_leaf); cache_nr_sets(this_leaf); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 58a9b608d8216a67f13d6c514d92290dc0a27de3..d99038487a0d2fab5164f8df3f1e0f7e58ea8ea3 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -511,10 +511,58 @@ static void __init cpu_dev_register_generic(void) #endif } +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES + +ssize_t __weak cpu_show_meltdown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + +static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, + &dev_attr_spectre_v1.attr, + &dev_attr_spectre_v2.attr, + NULL +}; + +static const struct attribute_group cpu_root_vulnerabilities_group = { + .name = "vulnerabilities", + .attrs = cpu_root_vulnerabilities_attrs, +}; + +static void __init cpu_register_vulnerabilities(void) +{ + if (sysfs_create_group(&cpu_subsys.dev_root->kobj, + &cpu_root_vulnerabilities_group)) + pr_err("Unable to register CPU vulnerabilities\n"); +} + +#else +static inline void cpu_register_vulnerabilities(void) { } +#endif + void __init cpu_dev_init(void) { if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups)) panic("Failed to register CPU subsystem"); cpu_dev_register_generic(); + cpu_register_vulnerabilities(); } diff --git a/drivers/base/isa.c b/drivers/base/isa.c index cd6ccdcf9df0c5ef2a37fabb2b4b7bbfa3c88755..372d10af26009dfae317affd625c756a2700fef1 100644 --- a/drivers/base/isa.c +++ b/drivers/base/isa.c @@ -39,7 +39,7 @@ static int isa_bus_probe(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->probe) + if (isa_driver && isa_driver->probe) return isa_driver->probe(dev, to_isa_dev(dev)->id); return 0; @@ -49,7 +49,7 @@ static int isa_bus_remove(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->remove) + if (isa_driver && isa_driver->remove) return isa_driver->remove(dev, to_isa_dev(dev)->id); return 0; @@ -59,7 +59,7 @@ static void isa_bus_shutdown(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->shutdown) + if (isa_driver && isa_driver->shutdown) isa_driver->shutdown(dev, to_isa_dev(dev)->id); } @@ -67,7 +67,7 @@ static int isa_bus_suspend(struct device *dev, pm_message_t state) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->suspend) + if (isa_driver && isa_driver->suspend) return isa_driver->suspend(dev, to_isa_dev(dev)->id, state); return 0; @@ -77,7 +77,7 @@ static int isa_bus_resume(struct device *dev) { struct isa_driver *isa_driver = dev->platform_data; - if (isa_driver->resume) + if (isa_driver && isa_driver->resume) return isa_driver->resume(dev, to_isa_dev(dev)->id); return 0; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index db2f044159274a35457f4f1e5b3a55d226f08cd2..08744b572af6a25184d274ba91304e19ec2be732 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -525,6 +525,21 @@ static void dpm_watchdog_clear(struct dpm_watchdog *wd) /*------------------------- Resume routines -------------------------*/ +/** + * dev_pm_skip_next_resume_phases - Skip next system resume phases for device. + * @dev: Target device. + * + * Make the core skip the "early resume" and "resume" phases for @dev. + * + * This function can be called by middle-layer code during the "noirq" phase of + * system resume if necessary, but not by device drivers. + */ +void dev_pm_skip_next_resume_phases(struct device *dev) +{ + dev->power.is_late_suspended = false; + dev->power.is_suspended = false; +} + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 027d159ac3810e9b5b190345c1bee60dbbc13790..6e89b51ea3d92b40eba2059bfdb47703916454d9 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -276,7 +276,8 @@ static int rpm_get_suppliers(struct device *dev) continue; retval = pm_runtime_get_sync(link->supplier); - if (retval < 0) { + /* Ignore suppliers with disabled runtime PM. */ + if (retval < 0 && retval != -EACCES) { pm_runtime_put_noidle(link->supplier); return retval; } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index bc8e61506968a0c3da43f2e4396f0cc9550b0d51..d5fe720cf14940b668f8764de2bad6cf95549528 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1581,9 +1581,8 @@ static int lo_open(struct block_device *bdev, fmode_t mode) return err; } -static void lo_release(struct gendisk *disk, fmode_t mode) +static void __lo_release(struct loop_device *lo) { - struct loop_device *lo = disk->private_data; int err; if (atomic_dec_return(&lo->lo_refcnt)) @@ -1610,6 +1609,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_ctl_mutex); } +static void lo_release(struct gendisk *disk, fmode_t mode) +{ + mutex_lock(&loop_index_mutex); + __lo_release(disk->private_data); + mutex_unlock(&loop_index_mutex); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ccb9975a97fa3f214d658776450ab618bae26643..ad0477ae820f040affe54f4368d3a02d9da63350 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps) struct nullb_cmd { struct list_head list; struct llist_node ll_list; - call_single_data_t csd; + struct __call_single_data csd; struct request *rq; struct bio *bio; unsigned int tag; + blk_status_t error; struct nullb_queue *nq; struct hrtimer timer; - blk_status_t error; }; struct nullb_queue { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 38fc5f397fdede04ebaf4f3f9c2e89118ee6b235..cc93522a6d419dc77902bab085306c2b316f6c16 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3047,13 +3047,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf) mutex_unlock(&rbd_dev->watch_mutex); } +static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie) +{ + struct rbd_client_id cid = rbd_get_cid(rbd_dev); + + strcpy(rbd_dev->lock_cookie, cookie); + rbd_set_owner_cid(rbd_dev, &cid); + queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); +} + /* * lock_rwsem must be held for write */ static int rbd_lock(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; - struct rbd_client_id cid = rbd_get_cid(rbd_dev); char cookie[32]; int ret; @@ -3068,9 +3076,7 @@ static int rbd_lock(struct rbd_device *rbd_dev) return ret; rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; - strcpy(rbd_dev->lock_cookie, cookie); - rbd_set_owner_cid(rbd_dev, &cid); - queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); + __rbd_lock(rbd_dev, cookie); return 0; } @@ -3856,7 +3862,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev) queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); } else { - strcpy(rbd_dev->lock_cookie, cookie); + __rbd_lock(rbd_dev, cookie); } } @@ -4381,7 +4387,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); q->limits.max_sectors = queue_max_hw_sectors(q); - blk_queue_max_segments(q, segment_size / SECTOR_SIZE); + blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 3c29d36702a8eceb4eabd5ed36cf3188adeb1694..5426c04fe24bc88faa88a2cff0624ccbce277ebe 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -1755,14 +1755,17 @@ static int cci_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); mutex_init(&cci_pmu->reserve_mutex); atomic_set(&cci_pmu->active_events, 0); - cpumask_set_cpu(smp_processor_id(), &cci_pmu->cpus); + cpumask_set_cpu(get_cpu(), &cci_pmu->cpus); ret = cci_pmu_init(cci_pmu, pdev); - if (ret) + if (ret) { + put_cpu(); return ret; + } cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, &cci_pmu->node); + put_cpu(); pr_info("ARM %s PMU driver probed", cci_pmu->model->name); return 0; } diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c index 3063f53123979d740c1f9c97099bcdb86103d43d..b52332e52ca5e9c9a9d4b4ede3667c3ec8066474 100644 --- a/drivers/bus/arm-ccn.c +++ b/drivers/bus/arm-ccn.c @@ -262,7 +262,7 @@ static struct attribute *arm_ccn_pmu_format_attrs[] = { NULL }; -static struct attribute_group arm_ccn_pmu_format_attr_group = { +static const struct attribute_group arm_ccn_pmu_format_attr_group = { .name = "format", .attrs = arm_ccn_pmu_format_attrs, }; @@ -451,7 +451,7 @@ static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = { static struct attribute *arm_ccn_pmu_events_attrs[ARRAY_SIZE(arm_ccn_pmu_events) + 1]; -static struct attribute_group arm_ccn_pmu_events_attr_group = { +static const struct attribute_group arm_ccn_pmu_events_attr_group = { .name = "events", .is_visible = arm_ccn_pmu_events_is_visible, .attrs = arm_ccn_pmu_events_attrs, @@ -548,7 +548,7 @@ static struct attribute *arm_ccn_pmu_cmp_mask_attrs[] = { NULL }; -static struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = { +static const struct attribute_group arm_ccn_pmu_cmp_mask_attr_group = { .name = "cmp_mask", .attrs = arm_ccn_pmu_cmp_mask_attrs, }; @@ -569,7 +569,7 @@ static struct attribute *arm_ccn_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group arm_ccn_pmu_cpumask_attr_group = { +static const struct attribute_group arm_ccn_pmu_cpumask_attr_group = { .attrs = arm_ccn_pmu_cpumask_attrs, }; @@ -1268,10 +1268,12 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) if (ccn->dt.id == 0) { name = "ccn"; } else { - int len = snprintf(NULL, 0, "ccn_%d", ccn->dt.id); - - name = devm_kzalloc(ccn->dev, len + 1, GFP_KERNEL); - snprintf(name, len + 1, "ccn_%d", ccn->dt.id); + name = devm_kasprintf(ccn->dev, GFP_KERNEL, "ccn_%d", + ccn->dt.id); + if (!name) { + err = -ENOMEM; + goto error_choose_name; + } } /* Perf driver registration */ @@ -1298,7 +1300,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) } /* Pick one CPU which we will use to collect data from CCN... */ - cpumask_set_cpu(smp_processor_id(), &ccn->dt.cpu); + cpumask_set_cpu(get_cpu(), &ccn->dt.cpu); /* Also make sure that the overflow interrupt is handled by this CPU */ if (ccn->irq) { @@ -1315,10 +1317,13 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, &ccn->dt.node); + put_cpu(); return 0; error_pmu_register: error_set_affinity: + put_cpu(); +error_choose_name: ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); @@ -1581,8 +1586,8 @@ static int __init arm_ccn_init(void) static void __exit arm_ccn_exit(void) { - cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); platform_driver_unregister(&arm_ccn_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE); } module_init(arm_ccn_init); diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 328ca93781cf2691fb692b64e86ec4c203ca5b47..1b76d958590275daa6b9c2f8c69ecd2d51655da7 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = { .match = sunxi_rsb_device_match, .probe = sunxi_rsb_device_probe, .remove = sunxi_rsb_device_remove, + .uevent = of_device_uevent_modalias, }; static void sunxi_rsb_dev_release(struct device *dev) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 779869ed32b1516261e80fffd440b3ca1e1132ea..71fad747c0c7c1052cc19ee3bad0568b4a80c55b 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -199,6 +199,9 @@ struct smi_info { /* The timer for this si. */ struct timer_list si_timer; + /* This flag is set, if the timer can be set */ + bool timer_can_start; + /* This flag is set, if the timer is running (timer_pending() isn't enough) */ bool timer_running; @@ -355,6 +358,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) { + if (!smi_info->timer_can_start) + return; smi_info->last_timeout_jiffies = jiffies; mod_timer(&smi_info->si_timer, new_val); smi_info->timer_running = true; @@ -374,21 +379,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg, smi_info->handlers->start_transaction(smi_info->si_sm, msg, size); } -static void start_check_enables(struct smi_info *smi_info, bool start_timer) +static void start_check_enables(struct smi_info *smi_info) { unsigned char msg[2]; msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; - if (start_timer) - start_new_msg(smi_info, msg, 2); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + start_new_msg(smi_info, msg, 2); smi_info->si_state = SI_CHECKING_ENABLES; } -static void start_clear_flags(struct smi_info *smi_info, bool start_timer) +static void start_clear_flags(struct smi_info *smi_info) { unsigned char msg[3]; @@ -397,10 +399,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer) msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD; msg[2] = WDT_PRE_TIMEOUT_INT; - if (start_timer) - start_new_msg(smi_info, msg, 3); - else - smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + start_new_msg(smi_info, msg, 3); smi_info->si_state = SI_CLEARING_FLAGS; } @@ -435,11 +434,11 @@ static void start_getting_events(struct smi_info *smi_info) * Note that we cannot just use disable_irq(), since the interrupt may * be shared. */ -static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer) +static inline bool disable_si_irq(struct smi_info *smi_info) { if ((smi_info->io.irq) && (!smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = true; - start_check_enables(smi_info, start_timer); + start_check_enables(smi_info); return true; } return false; @@ -449,7 +448,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info) { if ((smi_info->io.irq) && (smi_info->interrupt_disabled)) { smi_info->interrupt_disabled = false; - start_check_enables(smi_info, true); + start_check_enables(smi_info); return true; } return false; @@ -467,7 +466,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info) msg = ipmi_alloc_smi_msg(); if (!msg) { - if (!disable_si_irq(smi_info, true)) + if (!disable_si_irq(smi_info)) smi_info->si_state = SI_NORMAL; } else if (enable_si_irq(smi_info)) { ipmi_free_smi_msg(msg); @@ -483,7 +482,7 @@ static void handle_flags(struct smi_info *smi_info) /* Watchdog pre-timeout */ smi_inc_stat(smi_info, watchdog_pretimeouts); - start_clear_flags(smi_info, true); + start_clear_flags(smi_info); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; if (smi_info->intf) ipmi_smi_watchdog_pretimeout(smi_info->intf); @@ -866,7 +865,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, * disable and messages disabled. */ if (smi_info->supports_event_msg_buff || smi_info->io.irq) { - start_check_enables(smi_info, true); + start_check_enables(smi_info); } else { smi_info->curr_msg = alloc_msg_handle_irq(smi_info); if (!smi_info->curr_msg) @@ -1167,6 +1166,7 @@ static int smi_start_processing(void *send_info, /* Set up the timer that drives the interface. */ timer_setup(&new_smi->si_timer, smi_timeout, 0); + new_smi->timer_can_start = true; smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES); /* Try to claim any interrupts. */ @@ -1936,10 +1936,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info) check_set_rcv_irq(smi_info); } -static inline void wait_for_timer_and_thread(struct smi_info *smi_info) +static inline void stop_timer_and_thread(struct smi_info *smi_info) { if (smi_info->thread != NULL) kthread_stop(smi_info->thread); + + smi_info->timer_can_start = false; if (smi_info->timer_running) del_timer_sync(&smi_info->si_timer); } @@ -2152,7 +2154,7 @@ static int try_smi_init(struct smi_info *new_smi) * Start clearing the flags before we enable interrupts or the * timer to avoid racing with the timer. */ - start_clear_flags(new_smi, false); + start_clear_flags(new_smi); /* * IRQ is defined to be set when non-zero. req_events will @@ -2238,7 +2240,7 @@ static int try_smi_init(struct smi_info *new_smi) dev_set_drvdata(new_smi->io.dev, NULL); out_err_stop_timer: - wait_for_timer_and_thread(new_smi); + stop_timer_and_thread(new_smi); out_err: new_smi->interrupt_disabled = true; @@ -2388,7 +2390,7 @@ static void cleanup_one_si(struct smi_info *to_clean) */ if (to_clean->io.irq_cleanup) to_clean->io.irq_cleanup(&to_clean->io); - wait_for_timer_and_thread(to_clean); + stop_timer_and_thread(to_clean); /* * Timeouts are stopped, now make sure the interrupts are off @@ -2400,7 +2402,7 @@ static void cleanup_one_si(struct smi_info *to_clean) schedule_timeout_uninterruptible(1); } if (to_clean->handlers) - disable_si_irq(to_clean, false); + disable_si_irq(to_clean); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { poll(to_clean); schedule_timeout_uninterruptible(1); diff --git a/drivers/char/ipmi/ipmi_si_parisc.c b/drivers/char/ipmi/ipmi_si_parisc.c index 090b073ab441961f5c530dd9f453f1f1290e5a08..6b10f0e18a95d7f846edd78d22e392ec0e10cc69 100644 --- a/drivers/char/ipmi/ipmi_si_parisc.c +++ b/drivers/char/ipmi/ipmi_si_parisc.c @@ -10,6 +10,8 @@ static int __init ipmi_parisc_probe(struct parisc_device *dev) { struct si_sm_io io; + memset(&io, 0, sizeof(io)); + io.si_type = SI_KCS; io.addr_source = SI_DEVICETREE; io.addr_type = IPMI_MEM_ADDR_SPACE; diff --git a/drivers/char/ipmi/ipmi_si_pci.c b/drivers/char/ipmi/ipmi_si_pci.c index 99771f5cad07a7b25285f33cf0073739ca27e4ae..27dd11c49d2197aa098426ca2992565f75e45a32 100644 --- a/drivers/char/ipmi/ipmi_si_pci.c +++ b/drivers/char/ipmi/ipmi_si_pci.c @@ -103,10 +103,13 @@ static int ipmi_pci_probe(struct pci_dev *pdev, io.addr_source_cleanup = ipmi_pci_cleanup; io.addr_source_data = pdev; - if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) + if (pci_resource_flags(pdev, 0) & IORESOURCE_IO) { io.addr_type = IPMI_IO_ADDR_SPACE; - else + io.io_setup = ipmi_si_port_setup; + } else { io.addr_type = IPMI_MEM_ADDR_SPACE; + io.io_setup = ipmi_si_mem_setup; + } io.addr_data = pci_resource_start(pdev, 0); io.regspacing = ipmi_pci_probe_regspacing(&io); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 647d056df88c8dd2a7d8288e35fa2eeba9b7705b..b56c11f51bafad32bafc7b5b9c7467e7cd16632b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core) ret = core->ops->is_enabled(core->hw); done: - clk_pm_runtime_put(core); + if (core->dev) + pm_runtime_put(core->dev); return ret; } @@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core) best_parent_rate = core->parent->rate; } + if (clk_pm_runtime_get(core)) + return; + if (core->flags & CLK_SET_RATE_UNGATE) { unsigned long flags; @@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core) /* handle the new child who might not be in core->children yet */ if (core->new_child) clk_change_rate(core->new_child); + + clk_pm_runtime_put(core); } static int clk_core_set_rate_nolock(struct clk_core *core, diff --git a/drivers/clk/sunxi/clk-sun9i-mmc.c b/drivers/clk/sunxi/clk-sun9i-mmc.c index a1a634253d6f2299bfad888b2fa193c98b4ac019..f00d8758ba24f6e5ed537a88be76ff85e5a7c5e4 100644 --- a/drivers/clk/sunxi/clk-sun9i-mmc.c +++ b/drivers/clk/sunxi/clk-sun9i-mmc.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev, return 0; } +static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + sun9i_mmc_reset_assert(rcdev, id); + udelay(10); + sun9i_mmc_reset_deassert(rcdev, id); + + return 0; +} + static const struct reset_control_ops sun9i_mmc_reset_ops = { .assert = sun9i_mmc_reset_assert, .deassert = sun9i_mmc_reset_deassert, + .reset = sun9i_mmc_reset_reset, }; static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 58d4f4e1ad6a907991873a03027e6c7aa2f31fc4..ca38229b045ab288a2f250dddaf1b174e8c0572f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -22,6 +22,8 @@ #include "cpufreq_governor.h" +#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC) + static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs); static DEFINE_MUTEX(gov_dbs_data_mutex); @@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf, { struct dbs_data *dbs_data = to_dbs_data(attr_set); struct policy_dbs_info *policy_dbs; + unsigned int sampling_interval; int ret; - ret = sscanf(buf, "%u", &dbs_data->sampling_rate); - if (ret != 1) + + ret = sscanf(buf, "%u", &sampling_interval); + if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL) return -EINVAL; + dbs_data->sampling_rate = sampling_interval; + /* * We are operating under dbs_data->mutex and so the list and its * entries can't be freed concurrently. @@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) if (ret) goto free_policy_dbs_info; - dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); + /* + * The sampling interval should not be less than the transition latency + * of the CPU and it also cannot be too small for dbs_update() to work + * correctly. + */ + dbs_data->sampling_rate = max_t(unsigned int, + CPUFREQ_DBS_MIN_SAMPLING_INTERVAL, + cpufreq_policy_transition_delay_us(policy)); if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 628fe899cb483da9dbf0f7661b537734bc82f784..d9b2c2de49c43f125c91b382f818ff81d0ffc6ac 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev) val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; - if ((val != OCOTP_CFG3_SPEED_1P2GHZ) && - of_machine_is_compatible("fsl,imx6q")) - if (dev_pm_opp_disable(dev, 1200000000)) - dev_warn(dev, "failed to disable 1.2GHz OPP\n"); if (val < OCOTP_CFG3_SPEED_996MHZ) if (dev_pm_opp_disable(dev, 996000000)) dev_warn(dev, "failed to disable 996MHz OPP\n"); - if (of_machine_is_compatible("fsl,imx6q")) { + + if (of_machine_is_compatible("fsl,imx6q") || + of_machine_is_compatible("fsl,imx6qp")) { if (val != OCOTP_CFG3_SPEED_852MHZ) if (dev_pm_opp_disable(dev, 852000000)) dev_warn(dev, "failed to disable 852MHz OPP\n"); + if (val != OCOTP_CFG3_SPEED_1P2GHZ) + if (dev_pm_opp_disable(dev, 1200000000)) + dev_warn(dev, "failed to disable 1.2GHz OPP\n"); } iounmap(base); put_node: diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 3e104f5aa0c2f9e05a3d7abd22dd6045ed420e9c..b56b3f711d9410a9ebddaa821428fd646ad02417 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_AUTHENC + select CRYPTO_GF128MUL ---help--- The Chelsio Crypto Co-processor driver for T6 adapters. diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 89ba9e85c0f377577c3e119bac5e7157372faed6..4bcef78a08aad241675e50d959aff2a6c58e126b 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv ndesc = ctx->handle_result(priv, ring, sreq->req, &should_complete, &ret); if (ndesc < 0) { + kfree(sreq); dev_err(priv->dev, "failed to handle result (%d)", ndesc); return; } diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index 5438552bc6d783b57a23763e64c59afb90c340ae..fcc0a606d74839bb46f2af6d0ac66bdb12b3d511 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -14,6 +14,7 @@ #include #include +#include #include "safexcel.h" @@ -33,6 +34,10 @@ struct safexcel_cipher_ctx { unsigned int key_len; }; +struct safexcel_cipher_req { + bool needs_inv; +}; + static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, struct crypto_async_request *async, struct safexcel_command_desc *cdesc, @@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx, return 0; } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct skcipher_request *req = skcipher_request_cast(async); struct safexcel_result_desc *rdesc; @@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async, spin_unlock_bh(&priv->ring[ring].egress_lock); request->req = &req->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = n_rdesc; @@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_aes_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return ndesc; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int err; + + if (sreq->needs_inv) { + sreq->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_cipher_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, struct safexcel_crypto_priv *priv = ctx->priv; int ret; - ctx->base.handle_result = safexcel_handle_inv_result; - ret = safexcel_invalidate_cache(async, &ctx->base, priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -381,28 +401,46 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct skcipher_request *req = skcipher_request_cast(async); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); + int ret; + + if (sreq->needs_inv) + ret = safexcel_cipher_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_aes_send(async, ring, request, + commands, results); + return ret; +} + static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct skcipher_request req; + SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm)); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct skcipher_request)); + memset(req, 0, sizeof(struct skcipher_request)); /* create invalidation request */ init_completion(&result.completion); - skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, - safexcel_inv_complete, &result); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + safexcel_inv_complete, &result); - skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_cipher_send_inv; + sreq->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) @@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req, enum safexcel_cipher_direction dir, u32 mode) { struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct safexcel_cipher_req *sreq = skcipher_request_ctx(req); struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; + sreq->needs_inv = false; ctx->direction = dir; ctx->mode = mode; if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_cipher_send_inv; + if (ctx->base.needs_inv) { + sreq->needs_inv = true; + ctx->base.needs_inv = false; + } } else { ctx->base.ring = safexcel_select_ring(priv); - ctx->base.send = safexcel_aes_send; - ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, EIP197_GFP_FLAGS(req->base), &ctx->base.ctxr_dma); @@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm) alg.skcipher.base); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_send; + ctx->base.handle_result = safexcel_handle_result; + + crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm), + sizeof(struct safexcel_cipher_req)); return 0; } diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 74feb622710147baf79d49c25a119be7e640ac7a..0c5a5820b06e53cc9efa776c37822730af1421e4 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -32,9 +32,10 @@ struct safexcel_ahash_req { bool last_req; bool finish; bool hmac; + bool needs_inv; u8 state_sz; /* expected sate size, only set once */ - u32 state[SHA256_DIGEST_SIZE / sizeof(u32)]; + u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32)); u64 len; u64 processed; @@ -119,15 +120,15 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx, } } -static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, - struct crypto_async_request *async, - bool *should_complete, int *ret) +static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) { struct safexcel_result_desc *rdesc; struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); - int cache_len, result_sz = sreq->state_sz; + int cache_len; *ret = 0; @@ -148,8 +149,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, spin_unlock_bh(&priv->ring[ring].egress_lock); if (sreq->finish) - result_sz = crypto_ahash_digestsize(ahash); - memcpy(sreq->state, areq->result, result_sz); + memcpy(areq->result, sreq->state, + crypto_ahash_digestsize(ahash)); dma_unmap_sg(priv->dev, areq->src, sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE); @@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, return 1; } -static int safexcel_ahash_send(struct crypto_async_request *async, int ring, - struct safexcel_request *request, int *commands, - int *results) +static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, + struct safexcel_request *request, + int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); @@ -273,7 +274,7 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, /* Add the token */ safexcel_hash_token(first_cdesc, len, req->state_sz); - ctx->base.result_dma = dma_map_single(priv->dev, areq->result, + ctx->base.result_dma = dma_map_single(priv->dev, req->state, req->state_sz, DMA_FROM_DEVICE); if (dma_mapping_error(priv->dev, ctx->base.result_dma)) { ret = -EINVAL; @@ -292,7 +293,6 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, req->processed += len; request->req = &areq->base; - ctx->base.handle_result = safexcel_handle_result; *commands = n_cdesc; *results = 1; @@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, ring = safexcel_select_ring(priv); ctx->base.ring = ring; - ctx->base.needs_inv = false; - ctx->base.send = safexcel_ahash_send; spin_lock_bh(&priv->ring[ring].queue_lock); enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async); @@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv, return 1; } +static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, + struct crypto_async_request *async, + bool *should_complete, int *ret) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int err; + + if (req->needs_inv) { + req->needs_inv = false; + err = safexcel_handle_inv_result(priv, ring, async, + should_complete, ret); + } else { + err = safexcel_handle_req_result(priv, ring, async, + should_complete, ret); + } + + return err; +} + static int safexcel_ahash_send_inv(struct crypto_async_request *async, int ring, struct safexcel_request *request, int *commands, int *results) @@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); int ret; - ctx->base.handle_result = safexcel_handle_inv_result; ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv, ctx->base.ctxr_dma, ring, request); if (unlikely(ret)) @@ -412,28 +429,46 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async, return 0; } +static int safexcel_ahash_send(struct crypto_async_request *async, + int ring, struct safexcel_request *request, + int *commands, int *results) +{ + struct ahash_request *areq = ahash_request_cast(async); + struct safexcel_ahash_req *req = ahash_request_ctx(areq); + int ret; + + if (req->needs_inv) + ret = safexcel_ahash_send_inv(async, ring, request, + commands, results); + else + ret = safexcel_ahash_send_req(async, ring, request, + commands, results); + return ret; +} + static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) { struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; - struct ahash_request req; + AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm)); + struct safexcel_ahash_req *rctx = ahash_request_ctx(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; - memset(&req, 0, sizeof(struct ahash_request)); + memset(req, 0, sizeof(struct ahash_request)); /* create invalidation request */ init_completion(&result.completion); - ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, safexcel_inv_complete, &result); - ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm)); - ctx = crypto_tfm_ctx(req.base.tfm); + ahash_request_set_tfm(req, __crypto_ahash_cast(tfm)); + ctx = crypto_tfm_ctx(req->base.tfm); ctx->base.exit_inv = true; - ctx->base.send = safexcel_ahash_send_inv; + rctx->needs_inv = true; spin_lock_bh(&priv->ring[ring].queue_lock); - crypto_enqueue_request(&priv->ring[ring].queue, &req.base); + crypto_enqueue_request(&priv->ring[ring].queue, &req->base); spin_unlock_bh(&priv->ring[ring].queue_lock); if (!priv->ring[ring].need_dequeue) @@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq) struct safexcel_crypto_priv *priv = ctx->priv; int ret, ring; - ctx->base.send = safexcel_ahash_send; + req->needs_inv = false; if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq); if (ctx->base.ctxr) { - if (ctx->base.needs_inv) - ctx->base.send = safexcel_ahash_send_inv; + if (ctx->base.needs_inv) { + ctx->base.needs_inv = false; + req->needs_inv = true; + } } else { ctx->base.ring = safexcel_select_ring(priv); ctx->base.ctxr = dma_pool_zalloc(priv->context_pool, @@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm) struct safexcel_alg_template, alg.ahash); ctx->priv = tmpl->priv; + ctx->base.send = safexcel_ahash_send; + ctx->base.handle_result = safexcel_handle_result; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct safexcel_ahash_req)); diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 48de52cf2ecc10f7a1a33b1fa0025e94ec706a19..662e709812cc6f79e973fa10f71ad8f6d1991193 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1625,6 +1625,7 @@ static int queue_cache_init(void) CWQ_ENTRY_SIZE, 0, NULL); if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; return -ENOMEM; } return 0; @@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void) { kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]); kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]); + queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL; + queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL; } static long spu_queue_register_workfn(void *arg) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index fbab271b3bf9f9506c86579c75ebe32fc3235228..a861b5b4d4437d6b3be7dcf5e9d0b3475205455d 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan, unsigned long flags) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct data_chunk *first = xt->sgl; + struct data_chunk *first; struct at_desc *desc = NULL; size_t xfer_count; unsigned int dwidth; @@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan, if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) return NULL; + first = xt->sgl; + dev_info(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n", __func__, &xt->src_start, &xt->dst_start, xt->numf, diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c index d50273fed715096ac625382f6c511f537da57bf4..afd5e10f8927cb0c5573bb946a48755aad58b0aa 100644 --- a/drivers/dma/dma-jz4740.c +++ b/drivers/dma/dma-jz4740.c @@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev) ret = dma_async_device_register(dd); if (ret) - return ret; + goto err_clk; irq = platform_get_irq(pdev, 0); ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev); @@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_clk: + clk_disable_unprepare(dmadev->clk); return ret; } diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 47edc7fbf91f52e5259060824c38eaab69ebdb56..ec5f9d2bc8202f340c615cbe43731016d316d547 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)"); #define PATTERN_COUNT_MASK 0x1f #define PATTERN_MEMSET_IDX 0x01 +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + struct dmatest_thread { struct list_head node; struct dmatest_info *info; @@ -165,6 +171,8 @@ struct dmatest_thread { u8 **dsts; u8 **udsts; enum dma_transaction_type type; + wait_queue_head_t done_wait; + struct dmatest_done test_done; bool done; }; @@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start, return error_count; } -/* poor man's completion - we want to use wait_event_freezable() on it */ -struct dmatest_done { - bool done; - wait_queue_head_t *wait; -}; static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; - - done->done = true; - wake_up_all(done->wait); + struct dmatest_thread *thread = + container_of(arg, struct dmatest_thread, done_wait); + if (!thread->done) { + done->done = true; + wake_up_all(done->wait); + } else { + /* + * If thread->done, it means that this callback occurred + * after the parent thread has cleaned up. This can + * happen in the case that driver doesn't implement + * the terminate_all() functionality and a dma operation + * did not occur within the timeout period + */ + WARN(1, "dmatest: Kernel memory may be corrupted!!\n"); + } } static unsigned int min_odd(unsigned int x, unsigned int y) @@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len) */ static int dmatest_func(void *data) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); struct dmatest_thread *thread = data; - struct dmatest_done done = { .wait = &done_wait }; + struct dmatest_done *done = &thread->test_done; struct dmatest_info *info; struct dmatest_params *params; struct dma_chan *chan; @@ -673,9 +687,9 @@ static int dmatest_func(void *data) continue; } - done.done = false; + done->done = false; tx->callback = dmatest_callback; - tx->callback_param = &done; + tx->callback_param = done; cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { @@ -688,21 +702,12 @@ static int dmatest_func(void *data) } dma_async_issue_pending(chan); - wait_event_freezable_timeout(done_wait, done.done, + wait_event_freezable_timeout(thread->done_wait, done->done, msecs_to_jiffies(params->timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - if (!done.done) { - /* - * We're leaving the timed out dma operation with - * dangling pointer to done_wait. To make this - * correct, we'll need to allocate wait_done for - * each test iteration and perform "who's gonna - * free it this time?" dancing. For now, just - * leave it dangling. - */ - WARN(1, "dmatest: Kernel stack may be corrupted!!\n"); + if (!done->done) { dmaengine_unmap_put(um); result("test timed out", total_tests, src_off, dst_off, len, 0); @@ -789,7 +794,7 @@ static int dmatest_func(void *data) dmatest_KBs(runtime, total_len), ret); /* terminate all transfers on specified channels */ - if (ret) + if (ret || failed_tests) dmaengine_terminate_all(chan); thread->done = true; @@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info, thread->info = info; thread->chan = dtc->chan; thread->type = type; + thread->test_done.wait = &thread->done_wait; + init_waitqueue_head(&thread->done_wait); smp_wmb(); thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", dma_chan_name(chan), op, i); diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index 6775f2c74e25b7269417bbe001adfb03698dea97..c7568869284e17d4b63379b236a0f30391640820 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -863,11 +863,11 @@ static void fsl_edma_irq_exit( } } -static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma) +static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks) { int i; - for (i = 0; i < DMAMUX_NR; i++) + for (i = 0; i < nr_clocks; i++) clk_disable_unprepare(fsl_edma->muxclk[i]); } @@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i); fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(fsl_edma->muxbase[i])) + if (IS_ERR(fsl_edma->muxbase[i])) { + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); return PTR_ERR(fsl_edma->muxbase[i]); + } sprintf(clkname, "dmamux%d", i); fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname); if (IS_ERR(fsl_edma->muxclk[i])) { dev_err(&pdev->dev, "Missing DMAMUX block clock.\n"); + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); return PTR_ERR(fsl_edma->muxclk[i]); } ret = clk_prepare_enable(fsl_edma->muxclk[i]); - if (ret) { - /* disable only clks which were enabled on error */ - for (; i >= 0; i--) - clk_disable_unprepare(fsl_edma->muxclk[i]); - - dev_err(&pdev->dev, "DMAMUX clk block failed.\n"); - return ret; - } + if (ret) + /* on error: disable all previously enabled clks */ + fsl_disable_clocks(fsl_edma, i); } @@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Can't register Freescale eDMA engine. (%d)\n", ret); - fsl_disable_clocks(fsl_edma); + fsl_disable_clocks(fsl_edma, DMAMUX_NR); return ret; } @@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Can't register Freescale eDMA of_dma. (%d)\n", ret); dma_async_device_unregister(&fsl_edma->dma_dev); - fsl_disable_clocks(fsl_edma); + fsl_disable_clocks(fsl_edma, DMAMUX_NR); return ret; } @@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev) fsl_edma_cleanup_vchan(&fsl_edma->dma_dev); of_dma_controller_free(np); dma_async_device_unregister(&fsl_edma->dma_dev); - fsl_disable_clocks(fsl_edma); + fsl_disable_clocks(fsl_edma, DMAMUX_NR); return 0; } diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 2f31d3d0caa61821aa08aea360e06709bdb25d48..7792a9186f9cf35bae71792e5e0783cf53364b05 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma) if (memcmp(src, dest, IOAT_TEST_SIZE)) { dev_err(dev, "Self-test copy failed compare, disabling\n"); err = -ENODEV; - goto free_resources; + goto unmap_dma; } unmap_dma: diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index dfb373c8ba2a49629628dd33f1a36fd41a270b7b..7da9f1b83ebecf3641da1e87f35e5e9ec6621df8 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -28,7 +28,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -73,13 +72,21 @@ #define MAX_DVFS_DOMAINS 8 #define MAX_DVFS_OPPS 16 - -#define PROTO_REV_MAJOR_MASK GENMASK(31, 16) -#define PROTO_REV_MINOR_MASK GENMASK(15, 0) - -#define FW_REV_MAJOR_MASK GENMASK(31, 24) -#define FW_REV_MINOR_MASK GENMASK(23, 16) -#define FW_REV_PATCH_MASK GENMASK(15, 0) +#define DVFS_LATENCY(hdr) (le32_to_cpu(hdr) >> 16) +#define DVFS_OPP_COUNT(hdr) ((le32_to_cpu(hdr) >> 8) & 0xff) + +#define PROTOCOL_REV_MINOR_BITS 16 +#define PROTOCOL_REV_MINOR_MASK ((1U << PROTOCOL_REV_MINOR_BITS) - 1) +#define PROTOCOL_REV_MAJOR(x) ((x) >> PROTOCOL_REV_MINOR_BITS) +#define PROTOCOL_REV_MINOR(x) ((x) & PROTOCOL_REV_MINOR_MASK) + +#define FW_REV_MAJOR_BITS 24 +#define FW_REV_MINOR_BITS 16 +#define FW_REV_PATCH_MASK ((1U << FW_REV_MINOR_BITS) - 1) +#define FW_REV_MINOR_MASK ((1U << FW_REV_MAJOR_BITS) - 1) +#define FW_REV_MAJOR(x) ((x) >> FW_REV_MAJOR_BITS) +#define FW_REV_MINOR(x) (((x) & FW_REV_MINOR_MASK) >> FW_REV_MINOR_BITS) +#define FW_REV_PATCH(x) ((x) & FW_REV_PATCH_MASK) #define MAX_RX_TIMEOUT (msecs_to_jiffies(30)) @@ -304,6 +311,10 @@ struct clk_get_info { u8 name[20]; } __packed; +struct clk_get_value { + __le32 rate; +} __packed; + struct clk_set_value { __le16 id; __le16 reserved; @@ -317,9 +328,7 @@ struct legacy_clk_set_value { } __packed; struct dvfs_info { - u8 domain; - u8 opp_count; - __le16 latency; + __le32 header; struct { __le32 freq; __le32 m_volt; @@ -342,6 +351,11 @@ struct _scpi_sensor_info { char name[20]; }; +struct sensor_value { + __le32 lo_val; + __le32 hi_val; +} __packed; + struct dev_pstate_set { __le16 dev_id; u8 pstate; @@ -405,20 +419,19 @@ static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd) unsigned int len; if (scpi_info->is_legacy) { - struct legacy_scpi_shared_mem __iomem *mem = - ch->rx_payload; + struct legacy_scpi_shared_mem *mem = ch->rx_payload; /* RX Length is not replied by the legacy Firmware */ len = match->rx_len; - match->status = ioread32(&mem->status); + match->status = le32_to_cpu(mem->status); memcpy_fromio(match->rx_buf, mem->payload, len); } else { - struct scpi_shared_mem __iomem *mem = ch->rx_payload; + struct scpi_shared_mem *mem = ch->rx_payload; len = min(match->rx_len, CMD_SIZE(cmd)); - match->status = ioread32(&mem->status); + match->status = le32_to_cpu(mem->status); memcpy_fromio(match->rx_buf, mem->payload, len); } @@ -432,11 +445,11 @@ static void scpi_process_cmd(struct scpi_chan *ch, u32 cmd) static void scpi_handle_remote_msg(struct mbox_client *c, void *msg) { struct scpi_chan *ch = container_of(c, struct scpi_chan, cl); - struct scpi_shared_mem __iomem *mem = ch->rx_payload; + struct scpi_shared_mem *mem = ch->rx_payload; u32 cmd = 0; if (!scpi_info->is_legacy) - cmd = ioread32(&mem->command); + cmd = le32_to_cpu(mem->command); scpi_process_cmd(ch, cmd); } @@ -446,7 +459,7 @@ static void scpi_tx_prepare(struct mbox_client *c, void *msg) unsigned long flags; struct scpi_xfer *t = msg; struct scpi_chan *ch = container_of(c, struct scpi_chan, cl); - struct scpi_shared_mem __iomem *mem = ch->tx_payload; + struct scpi_shared_mem *mem = (struct scpi_shared_mem *)ch->tx_payload; if (t->tx_buf) { if (scpi_info->is_legacy) @@ -465,7 +478,7 @@ static void scpi_tx_prepare(struct mbox_client *c, void *msg) } if (!scpi_info->is_legacy) - iowrite32(t->cmd, &mem->command); + mem->command = cpu_to_le32(t->cmd); } static struct scpi_xfer *get_scpi_xfer(struct scpi_chan *ch) @@ -570,13 +583,13 @@ scpi_clk_get_range(u16 clk_id, unsigned long *min, unsigned long *max) static unsigned long scpi_clk_get_val(u16 clk_id) { int ret; - __le32 rate; + struct clk_get_value clk; __le16 le_clk_id = cpu_to_le16(clk_id); ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id, - sizeof(le_clk_id), &rate, sizeof(rate)); + sizeof(le_clk_id), &clk, sizeof(clk)); - return ret ? ret : le32_to_cpu(rate); + return ret ? ret : le32_to_cpu(clk.rate); } static int scpi_clk_set_val(u16 clk_id, unsigned long rate) @@ -631,35 +644,35 @@ static int opp_cmp_func(const void *opp1, const void *opp2) } static struct scpi_dvfs_info *scpi_dvfs_get_info(u8 domain) -{ - if (domain >= MAX_DVFS_DOMAINS) - return ERR_PTR(-EINVAL); - - return scpi_info->dvfs[domain] ?: ERR_PTR(-EINVAL); -} - -static int scpi_dvfs_populate_info(struct device *dev, u8 domain) { struct scpi_dvfs_info *info; struct scpi_opp *opp; struct dvfs_info buf; int ret, i; + if (domain >= MAX_DVFS_DOMAINS) + return ERR_PTR(-EINVAL); + + if (scpi_info->dvfs[domain]) /* data already populated */ + return scpi_info->dvfs[domain]; + ret = scpi_send_message(CMD_GET_DVFS_INFO, &domain, sizeof(domain), &buf, sizeof(buf)); if (ret) - return ret; + return ERR_PTR(ret); - info = devm_kmalloc(dev, sizeof(*info), GFP_KERNEL); + info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - info->count = buf.opp_count; - info->latency = le16_to_cpu(buf.latency) * 1000; /* uS to nS */ + info->count = DVFS_OPP_COUNT(buf.header); + info->latency = DVFS_LATENCY(buf.header) * 1000; /* uS to nS */ - info->opps = devm_kcalloc(dev, info->count, sizeof(*opp), GFP_KERNEL); - if (!info->opps) - return -ENOMEM; + info->opps = kcalloc(info->count, sizeof(*opp), GFP_KERNEL); + if (!info->opps) { + kfree(info); + return ERR_PTR(-ENOMEM); + } for (i = 0, opp = info->opps; i < info->count; i++, opp++) { opp->freq = le32_to_cpu(buf.opps[i].freq); @@ -669,15 +682,7 @@ static int scpi_dvfs_populate_info(struct device *dev, u8 domain) sort(info->opps, info->count, sizeof(*opp), opp_cmp_func, NULL); scpi_info->dvfs[domain] = info; - return 0; -} - -static void scpi_dvfs_populate(struct device *dev) -{ - int domain; - - for (domain = 0; domain < MAX_DVFS_DOMAINS; domain++) - scpi_dvfs_populate_info(dev, domain); + return info; } static int scpi_dev_domain_id(struct device *dev) @@ -708,6 +713,9 @@ static int scpi_dvfs_get_transition_latency(struct device *dev) if (IS_ERR(info)) return PTR_ERR(info); + if (!info->latency) + return 0; + return info->latency; } @@ -768,19 +776,20 @@ static int scpi_sensor_get_info(u16 sensor_id, struct scpi_sensor_info *info) static int scpi_sensor_get_value(u16 sensor, u64 *val) { __le16 id = cpu_to_le16(sensor); - __le64 value; + struct sensor_value buf; int ret; ret = scpi_send_message(CMD_SENSOR_VALUE, &id, sizeof(id), - &value, sizeof(value)); + &buf, sizeof(buf)); if (ret) return ret; if (scpi_info->is_legacy) - /* only 32-bits supported, upper 32 bits can be junk */ - *val = le32_to_cpup((__le32 *)&value); + /* only 32-bits supported, hi_val can be junk */ + *val = le32_to_cpu(buf.lo_val); else - *val = le64_to_cpu(value); + *val = (u64)le32_to_cpu(buf.hi_val) << 32 | + le32_to_cpu(buf.lo_val); return 0; } @@ -853,19 +862,23 @@ static int scpi_init_versions(struct scpi_drvinfo *info) static ssize_t protocol_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%lu.%lu\n", - FIELD_GET(PROTO_REV_MAJOR_MASK, scpi_info->protocol_version), - FIELD_GET(PROTO_REV_MINOR_MASK, scpi_info->protocol_version)); + struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev); + + return sprintf(buf, "%d.%d\n", + PROTOCOL_REV_MAJOR(scpi_info->protocol_version), + PROTOCOL_REV_MINOR(scpi_info->protocol_version)); } static DEVICE_ATTR_RO(protocol_version); static ssize_t firmware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%lu.%lu.%lu\n", - FIELD_GET(FW_REV_MAJOR_MASK, scpi_info->firmware_version), - FIELD_GET(FW_REV_MINOR_MASK, scpi_info->firmware_version), - FIELD_GET(FW_REV_PATCH_MASK, scpi_info->firmware_version)); + struct scpi_drvinfo *scpi_info = dev_get_drvdata(dev); + + return sprintf(buf, "%d.%d.%d\n", + FW_REV_MAJOR(scpi_info->firmware_version), + FW_REV_MINOR(scpi_info->firmware_version), + FW_REV_PATCH(scpi_info->firmware_version)); } static DEVICE_ATTR_RO(firmware_version); @@ -876,13 +889,39 @@ static struct attribute *versions_attrs[] = { }; ATTRIBUTE_GROUPS(versions); -static void scpi_free_channels(void *data) +static void +scpi_free_channels(struct device *dev, struct scpi_chan *pchan, int count) { - struct scpi_drvinfo *info = data; int i; - for (i = 0; i < info->num_chans; i++) - mbox_free_channel(info->channels[i].chan); + for (i = 0; i < count && pchan->chan; i++, pchan++) { + mbox_free_channel(pchan->chan); + devm_kfree(dev, pchan->xfers); + devm_iounmap(dev, pchan->rx_payload); + } +} + +static int scpi_remove(struct platform_device *pdev) +{ + int i; + struct device *dev = &pdev->dev; + struct scpi_drvinfo *info = platform_get_drvdata(pdev); + + scpi_info = NULL; /* stop exporting SCPI ops through get_scpi_ops */ + + of_platform_depopulate(dev); + sysfs_remove_groups(&dev->kobj, versions_groups); + scpi_free_channels(dev, info->channels, info->num_chans); + platform_set_drvdata(pdev, NULL); + + for (i = 0; i < MAX_DVFS_DOMAINS && info->dvfs[i]; i++) { + kfree(info->dvfs[i]->opps); + kfree(info->dvfs[i]); + } + devm_kfree(dev, info->channels); + devm_kfree(dev, info); + + return 0; } #define MAX_SCPI_XFERS 10 @@ -913,6 +952,7 @@ static int scpi_probe(struct platform_device *pdev) { int count, idx, ret; struct resource res; + struct scpi_chan *scpi_chan; struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; @@ -929,19 +969,13 @@ static int scpi_probe(struct platform_device *pdev) return -ENODEV; } - scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan), - GFP_KERNEL); - if (!scpi_info->channels) + scpi_chan = devm_kcalloc(dev, count, sizeof(*scpi_chan), GFP_KERNEL); + if (!scpi_chan) return -ENOMEM; - ret = devm_add_action(dev, scpi_free_channels, scpi_info); - if (ret) - return ret; - - for (; scpi_info->num_chans < count; scpi_info->num_chans++) { + for (idx = 0; idx < count; idx++) { resource_size_t size; - int idx = scpi_info->num_chans; - struct scpi_chan *pchan = scpi_info->channels + idx; + struct scpi_chan *pchan = scpi_chan + idx; struct mbox_client *cl = &pchan->cl; struct device_node *shmem = of_parse_phandle(np, "shmem", idx); @@ -949,14 +983,15 @@ static int scpi_probe(struct platform_device *pdev) of_node_put(shmem); if (ret) { dev_err(dev, "failed to get SCPI payload mem resource\n"); - return ret; + goto err; } size = resource_size(&res); pchan->rx_payload = devm_ioremap(dev, res.start, size); if (!pchan->rx_payload) { dev_err(dev, "failed to ioremap SCPI payload\n"); - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + goto err; } pchan->tx_payload = pchan->rx_payload + (size >> 1); @@ -982,11 +1017,17 @@ static int scpi_probe(struct platform_device *pdev) dev_err(dev, "failed to get channel%d err %d\n", idx, ret); } +err: + scpi_free_channels(dev, scpi_chan, idx); + scpi_info = NULL; return ret; } + scpi_info->channels = scpi_chan; + scpi_info->num_chans = count; scpi_info->commands = scpi_std_commands; - scpi_info->scpi_ops = &scpi_ops; + + platform_set_drvdata(pdev, scpi_info); if (scpi_info->is_legacy) { /* Replace with legacy variants */ @@ -1002,23 +1043,23 @@ static int scpi_probe(struct platform_device *pdev) ret = scpi_init_versions(scpi_info); if (ret) { dev_err(dev, "incorrect or no SCP firmware found\n"); + scpi_remove(pdev); return ret; } - scpi_dvfs_populate(dev); - - _dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n", - FIELD_GET(PROTO_REV_MAJOR_MASK, scpi_info->protocol_version), - FIELD_GET(PROTO_REV_MINOR_MASK, scpi_info->protocol_version), - FIELD_GET(FW_REV_MAJOR_MASK, scpi_info->firmware_version), - FIELD_GET(FW_REV_MINOR_MASK, scpi_info->firmware_version), - FIELD_GET(FW_REV_PATCH_MASK, scpi_info->firmware_version)); + _dev_info(dev, "SCP Protocol %d.%d Firmware %d.%d.%d version\n", + PROTOCOL_REV_MAJOR(scpi_info->protocol_version), + PROTOCOL_REV_MINOR(scpi_info->protocol_version), + FW_REV_MAJOR(scpi_info->firmware_version), + FW_REV_MINOR(scpi_info->firmware_version), + FW_REV_PATCH(scpi_info->firmware_version)); + scpi_info->scpi_ops = &scpi_ops; - ret = devm_device_add_groups(dev, versions_groups); + ret = sysfs_create_groups(&dev->kobj, versions_groups); if (ret) dev_err(dev, "unable to create sysfs version group\n"); - return devm_of_platform_populate(dev); + return of_platform_populate(dev->of_node, NULL, NULL, dev); } static const struct of_device_id scpi_of_match[] = { @@ -1035,6 +1076,7 @@ static struct platform_driver scpi_driver = { .of_match_table = scpi_of_match, }, .probe = scpi_probe, + .remove = scpi_remove, }; module_platform_driver(scpi_driver); diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index ec8ac5c4dd84f93e386db9871946c81ea1dbc8d5..055e2e8f985a3fbc5b334f8c0d414c8b6c51e3e3 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -20,10 +20,6 @@ #define NO_FURTHER_WRITE_ACTION -1 -#ifndef phys_to_page -#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT) -#endif - /** * efi_free_all_buff_pages - free all previous allocated buffer pages * @cap_info: pointer to current instance of capsule_info structure @@ -35,7 +31,7 @@ static void efi_free_all_buff_pages(struct capsule_info *cap_info) { while (cap_info->index > 0) - __free_page(phys_to_page(cap_info->pages[--cap_info->index])); + __free_page(cap_info->pages[--cap_info->index]); cap_info->index = NO_FURTHER_WRITE_ACTION; } @@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info) cap_info->pages = temp_page; + temp_page = krealloc(cap_info->phys, + pages_needed * sizeof(phys_addr_t *), + GFP_KERNEL | __GFP_ZERO); + if (!temp_page) + return -ENOMEM; + + cap_info->phys = temp_page; + return 0; } @@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, **/ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) { + bool do_vunmap = false; int ret; - ret = efi_capsule_update(&cap_info->header, cap_info->pages); + /* + * cap_info->capsule may have been assigned already by a quirk + * handler, so only overwrite it if it is NULL + */ + if (!cap_info->capsule) { + cap_info->capsule = vmap(cap_info->pages, cap_info->index, + VM_MAP, PAGE_KERNEL); + if (!cap_info->capsule) + return -ENOMEM; + do_vunmap = true; + } + + ret = efi_capsule_update(cap_info->capsule, cap_info->phys); + if (do_vunmap) + vunmap(cap_info->capsule); if (ret) { pr_err("capsule update failed\n"); return ret; @@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, goto failed; } - cap_info->pages[cap_info->index++] = page_to_phys(page); + cap_info->pages[cap_info->index] = page; + cap_info->phys[cap_info->index] = page_to_phys(page); cap_info->page_bytes_remain = PAGE_SIZE; + cap_info->index++; } else { - page = phys_to_page(cap_info->pages[cap_info->index - 1]); + page = cap_info->pages[cap_info->index - 1]; } kbuff = kmap(page); @@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file) struct capsule_info *cap_info = file->private_data; kfree(cap_info->pages); + kfree(cap_info->phys); kfree(file->private_data); file->private_data = NULL; return 0; @@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } + cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL); + if (!cap_info->phys) { + kfree(cap_info->pages); + kfree(cap_info); + return -ENOMEM; + } + file->private_data = cap_info; return 0; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f70febf680c392b37217ce5e6f8c8c4301234869..557a47829d03f2b14b0c3b664e1044e7e2cb86bc 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -109,6 +109,8 @@ struct kobject *efi_kobj; /* * Let's not leave out systab information that snuck into * the efivars driver + * Note, do not add more fields in systab sysfs file as it breaks sysfs + * one value per file rule! */ static ssize_t systab_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -143,8 +145,7 @@ static ssize_t systab_show(struct kobject *kobj, return str - buf; } -static struct kobj_attribute efi_attr_systab = - __ATTR(systab, 0400, systab_show, NULL); +static struct kobj_attribute efi_attr_systab = __ATTR_RO_MODE(systab, 0400); #define EFI_FIELD(var) efi.var diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index bd7ed3c1148a7ccd5032e367e3af0ba66af18d20..c47e0c6ec00f858c0b9960f605974b4f2e4b1294 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -106,7 +106,7 @@ static const struct sysfs_ops esre_attr_ops = { }; /* Generic ESRT Entry ("ESRE") support. */ -static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) +static ssize_t fw_class_show(struct esre_entry *entry, char *buf) { char *str = buf; @@ -117,18 +117,16 @@ static ssize_t esre_fw_class_show(struct esre_entry *entry, char *buf) return str - buf; } -static struct esre_attribute esre_fw_class = __ATTR(fw_class, 0400, - esre_fw_class_show, NULL); +static struct esre_attribute esre_fw_class = __ATTR_RO_MODE(fw_class, 0400); #define esre_attr_decl(name, size, fmt) \ -static ssize_t esre_##name##_show(struct esre_entry *entry, char *buf) \ +static ssize_t name##_show(struct esre_entry *entry, char *buf) \ { \ return sprintf(buf, fmt "\n", \ le##size##_to_cpu(entry->esre.esre1->name)); \ } \ \ -static struct esre_attribute esre_##name = __ATTR(name, 0400, \ - esre_##name##_show, NULL) +static struct esre_attribute esre_##name = __ATTR_RO_MODE(name, 0400) esre_attr_decl(fw_type, 32, "%u"); esre_attr_decl(fw_version, 32, "%u"); @@ -193,14 +191,13 @@ static int esre_create_sysfs_entry(void *esre, int entry_num) /* support for displaying ESRT fields at the top level */ #define esrt_attr_decl(name, size, fmt) \ -static ssize_t esrt_##name##_show(struct kobject *kobj, \ +static ssize_t name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, char *buf)\ { \ return sprintf(buf, fmt "\n", le##size##_to_cpu(esrt->name)); \ } \ \ -static struct kobj_attribute esrt_##name = __ATTR(name, 0400, \ - esrt_##name##_show, NULL) +static struct kobj_attribute esrt_##name = __ATTR_RO_MODE(name, 0400) esrt_attr_decl(fw_resource_count, 32, "%u"); esrt_attr_decl(fw_resource_count_max, 32, "%u"); @@ -431,7 +428,7 @@ static int __init esrt_sysfs_init(void) err_remove_esrt: kobject_put(esrt_kobj); err: - kfree(esrt); + memunmap(esrt); esrt = NULL; return error; } diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c index 8e64b77aeac95e43c0e0571694f42bbe6c8ba73f..f377609ff141bca733bf498babc25f9d215aefad 100644 --- a/drivers/firmware/efi/runtime-map.c +++ b/drivers/firmware/efi/runtime-map.c @@ -63,11 +63,11 @@ static ssize_t map_attr_show(struct kobject *kobj, struct attribute *attr, return map_attr->show(entry, buf); } -static struct map_attribute map_type_attr = __ATTR_RO(type); -static struct map_attribute map_phys_addr_attr = __ATTR_RO(phys_addr); -static struct map_attribute map_virt_addr_attr = __ATTR_RO(virt_addr); -static struct map_attribute map_num_pages_attr = __ATTR_RO(num_pages); -static struct map_attribute map_attribute_attr = __ATTR_RO(attribute); +static struct map_attribute map_type_attr = __ATTR_RO_MODE(type, 0400); +static struct map_attribute map_phys_addr_attr = __ATTR_RO_MODE(phys_addr, 0400); +static struct map_attribute map_virt_addr_attr = __ATTR_RO_MODE(virt_addr, 0400); +static struct map_attribute map_num_pages_attr = __ATTR_RO_MODE(num_pages, 0400); +static struct map_attribute map_attribute_attr = __ATTR_RO_MODE(attribute, 0400); /* * These are default attributes that are added for every memmap entry. diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c index 35e553b3b19051b45985991b9b66dc19366fc41e..e4b40f2b46274a0871d1cb881732e8358a324112 100644 --- a/drivers/firmware/google/vpd.c +++ b/drivers/firmware/google/vpd.c @@ -295,38 +295,60 @@ static int vpd_probe(struct platform_device *pdev) if (ret) return ret; - return vpd_sections_init(entry.cbmem_addr); + vpd_kobj = kobject_create_and_add("vpd", firmware_kobj); + if (!vpd_kobj) + return -ENOMEM; + + ret = vpd_sections_init(entry.cbmem_addr); + if (ret) { + kobject_put(vpd_kobj); + return ret; + } + + return 0; +} + +static int vpd_remove(struct platform_device *pdev) +{ + vpd_section_destroy(&ro_vpd); + vpd_section_destroy(&rw_vpd); + + kobject_put(vpd_kobj); + + return 0; } static struct platform_driver vpd_driver = { .probe = vpd_probe, + .remove = vpd_remove, .driver = { .name = "vpd", }, }; +static struct platform_device *vpd_pdev; + static int __init vpd_platform_init(void) { - struct platform_device *pdev; - - pdev = platform_device_register_simple("vpd", -1, NULL, 0); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + int ret; - vpd_kobj = kobject_create_and_add("vpd", firmware_kobj); - if (!vpd_kobj) - return -ENOMEM; + ret = platform_driver_register(&vpd_driver); + if (ret) + return ret; - platform_driver_register(&vpd_driver); + vpd_pdev = platform_device_register_simple("vpd", -1, NULL, 0); + if (IS_ERR(vpd_pdev)) { + platform_driver_unregister(&vpd_driver); + return PTR_ERR(vpd_pdev); + } return 0; } static void __exit vpd_platform_exit(void) { - vpd_section_destroy(&ro_vpd); - vpd_section_destroy(&rw_vpd); - kobject_put(vpd_kobj); + platform_device_unregister(vpd_pdev); + platform_driver_unregister(&vpd_driver); } module_init(vpd_platform_init); diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 5cfe39f7a45f080f56f36eea6259ec4c1b1df8b6..deb483064f53c3e680d34b655360faac04853c3f 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device *pdev) { pr_debug("fw_cfg: unloading.\n"); fw_cfg_sysfs_cache_cleanup(); + sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr); + fw_cfg_io_cleanup(); fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset); fw_cfg_kobj_cleanup(fw_cfg_sel_ko); - fw_cfg_io_cleanup(); return 0; } diff --git a/drivers/gpio/gpio-74x164.c b/drivers/gpio/gpio-74x164.c index 6b535ec858cc35330baa773782dd51bcccd4da24..15a1f4b348c41b2915755dba173d71704a862768 100644 --- a/drivers/gpio/gpio-74x164.c +++ b/drivers/gpio/gpio-74x164.c @@ -23,6 +23,7 @@ struct gen_74x164_chip { struct gpio_chip gpio_chip; struct mutex lock; + struct gpio_desc *gpiod_oe; u32 registers; /* * Since the registers are chained, every byte sent will make @@ -31,8 +32,7 @@ struct gen_74x164_chip { * register at the end of the transfer. So, to have a logical * numbering, store the bytes in reverse order. */ - u8 buffer[0]; - struct gpio_desc *gpiod_oe; + u8 buffer[]; }; static int __gen_74x164_write_config(struct gen_74x164_chip *chip) diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index dfcf56ee3c6181fc64d0a074e2f5659fec35a6d8..76861a00bb92c4b449f346433f282da8e6cb82c9 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = { * category than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) @@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, d->host_data); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class); irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq); irq_set_noprobe(irq); diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 545d43a587b7ef1308dc827ffbea2c2dd733b478..bb4f8cf18bd9f6c7e47328aaf2ec1cdd3dfc3d6b 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank( * category than their parents, so it won't report false recursion. */ static struct lock_class_key brcmstb_gpio_irq_lock_class; +static struct lock_class_key brcmstb_gpio_irq_request_class; static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, @@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq, ret = irq_set_chip_data(irq, &bank->gc); if (ret < 0) return ret; - irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class); + irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class, + &brcmstb_gpio_irq_request_class); irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq); irq_set_noprobe(irq); return 0; diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index f75d8443ecaff631d07e8b474e2bdf769357adb0..e4b3d7db68c95a2d87b9766e54f688fe2dd13f36 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -383,7 +383,7 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger) u32 mask; d = (struct davinci_gpio_controller *)irq_data_get_irq_handler_data(data); - g = (struct davinci_gpio_regs __iomem *)d->regs; + g = (struct davinci_gpio_regs __iomem *)d->regs[0]; mask = __gpio_mask(data->irq - d->base_irq); if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index babb7bd2ba59b60aa723fa10606c186862a12af3..a0a5f9730aa77b92ea5bc520c22f64b386edb583 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -947,7 +947,7 @@ static const struct of_device_id pca953x_dt_ids[] = { { .compatible = "ti,tca6416", .data = OF_953X(16, PCA_INT), }, { .compatible = "ti,tca6424", .data = OF_953X(24, PCA_INT), }, - { .compatible = "onsemi,pca9654", .data = OF_953X( 8, PCA_INT), }, + { .compatible = "onnn,pca9654", .data = OF_953X( 8, PCA_INT), }, { .compatible = "exar,xra1202", .data = OF_953X( 8, 0), }, { } diff --git a/drivers/gpio/gpio-reg.c b/drivers/gpio/gpio-reg.c index 23e771dba4c17ab7a497feb37c108f15fe117c79..e85903eddc68ecd5930df5ded45efbc398aa57ca 100644 --- a/drivers/gpio/gpio-reg.c +++ b/drivers/gpio/gpio-reg.c @@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset) struct gpio_reg *r = to_gpio_reg(gc); int irq = r->irqs[offset]; - if (irq >= 0 && r->irq.domain) - irq = irq_find_mapping(r->irq.domain, irq); + if (irq >= 0 && r->irqdomain) + irq = irq_find_mapping(r->irqdomain, irq); return irq; } diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 8db47f671708752dbaae420478935f1613847dd3..02fa8fe2292a13608e332d6247e9c0b55870ae98 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = { * than their parents, so it won't report false recursion. */ static struct lock_class_key gpio_lock_class; +static struct lock_class_key gpio_request_class; static int tegra_gpio_probe(struct platform_device *pdev) { @@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev) bank = &tgi->bank_info[GPIO_BANK(gpio)]; - irq_set_lockdep_class(irq, &gpio_lock_class); + irq_set_lockdep_class(irq, &gpio_lock_class, + &gpio_request_class); irq_set_chip_data(irq, bank); irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq); } diff --git a/drivers/gpio/gpio-xgene-sb.c b/drivers/gpio/gpio-xgene-sb.c index 2313af82fad3d4bb3cf7027bec9f25d9e6b5887c..acd59113e08b9cda0fce6930981520a481ec7203 100644 --- a/drivers/gpio/gpio-xgene-sb.c +++ b/drivers/gpio/gpio-xgene-sb.c @@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio) static int xgene_gpio_sb_domain_activate(struct irq_domain *d, struct irq_data *irq_data, - bool early) + bool reserve) { struct xgene_gpio_sb *priv = d->host_data; u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq); diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index eb4528c87c0b3977420a2108c7feaaf9b2a95869..d6f3d9ee1350e422e3f373427d55d6ce8b6be565 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip) } if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent)); acpi_gpiochip_request_regions(acpi_gpio); acpi_gpiochip_scan_gpios(acpi_gpio); diff --git a/drivers/gpio/gpiolib-devprop.c b/drivers/gpio/gpiolib-devprop.c index 27f383bda7d9621322a3340d9def92354d21d7a3..f748aa3e77f72000b357718d5c955abba38c5c07 100644 --- a/drivers/gpio/gpiolib-devprop.c +++ b/drivers/gpio/gpiolib-devprop.c @@ -19,30 +19,27 @@ /** * devprop_gpiochip_set_names - Set GPIO line names using device properties * @chip: GPIO chip whose lines should be named, if possible + * @fwnode: Property Node containing the gpio-line-names property * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned * names belong to the underlying firmware node and should not be released * by the caller. */ -void devprop_gpiochip_set_names(struct gpio_chip *chip) +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode) { struct gpio_device *gdev = chip->gpiodev; const char **names; int ret, i; - if (!chip->parent) { - dev_warn(&gdev->dev, "GPIO chip parent is NULL\n"); - return; - } - - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", NULL, 0); if (ret < 0) return; if (ret != gdev->ngpio) { - dev_warn(chip->parent, + dev_warn(&gdev->dev, "names %d do not match number of GPIOs %d\n", ret, gdev->ngpio); return; @@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return; - ret = device_property_read_string_array(chip->parent, "gpio-line-names", + ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", names, gdev->ngpio); if (ret < 0) { - dev_warn(chip->parent, "failed to read GPIO line names\n"); + dev_warn(&gdev->dev, "failed to read GPIO line names\n"); kfree(names); return; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e0d59e61b52fa6aa53e7830ae1ab1c59ceec2453..72a0695d2ac3a3d3217462ff4ea85147921f5dcd 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip) /* If the chip defines names itself, these take precedence */ if (!chip->names) - devprop_gpiochip_set_names(chip); + devprop_gpiochip_set_names(chip, + of_fwnode_handle(chip->of_node)); of_node_get(chip->of_node); diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index aad84a6306c4e5ddbc3364d58add85fa1b1e2583..14532d9576e4239ff60a193bec13a1a819c338b8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices); static void gpiochip_free_hogs(struct gpio_chip *chip); static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip); @@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void) } int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { unsigned long flags; int status = 0; @@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, if (status) goto err_remove_from_list; - status = gpiochip_add_irqchip(chip, key); + status = gpiochip_add_irqchip(chip, lock_key, request_key); if (status) goto err_remove_chip; @@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ - irq_set_lockdep_class(irq, chip->irq.lock_key); + irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key); irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler); /* Chips that use nested thread handlers have them marked */ if (chip->irq.threaded) @@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset) /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gpiochip: the GPIO chip to add the IRQ chip to - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request */ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct irq_chip *irqchip = gpiochip->irq.chip; const struct irq_domain_ops *ops; @@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip, gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.default_type = type; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; if (gpiochip->irq.domain_ops) ops = gpiochip->irq.domain_ops; @@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE * to have the core avoid setting up any default type in the hardware. * @threaded: whether this irqchip uses a nested thread handler - * @lock_key: lockdep class + * @lock_key: lockdep class for IRQ lock + * @request_key: lockdep class for IRQ request * * This function closely associates a certain irqchip with a certain * gpiochip, providing an irq domain to translate the local IRQs to @@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { struct device_node *of_node; @@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, gpiochip->irq.default_type = type; gpiochip->to_irq = gpiochip_to_irq; gpiochip->irq.lock_key = lock_key; + gpiochip->irq.request_key = request_key; gpiochip->irq.domain = irq_domain_add_simple(of_node, gpiochip->ngpio, first_irq, &gpiochip_domain_ops, gpiochip); @@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip, - struct lock_class_key *key) + struct lock_class_key *lock_key, + struct lock_class_key *request_key) { return 0; } @@ -2883,6 +2892,27 @@ void gpiod_set_raw_value(struct gpio_desc *desc, int value) } EXPORT_SYMBOL_GPL(gpiod_set_raw_value); +/** + * gpiod_set_value_nocheck() - set a GPIO line value without checking + * @desc: the descriptor to set the value on + * @value: value to set + * + * This sets the value of a GPIO line backing a descriptor, applying + * different semantic quirks like active low and open drain/source + * handling. + */ +static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value) +{ + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) + value = !value; + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) + gpio_set_open_drain_value_commit(desc, value); + else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) + gpio_set_open_source_value_commit(desc, value); + else + gpiod_set_raw_value_commit(desc, value); +} + /** * gpiod_set_value() - assign a gpio's value * @desc: gpio whose value will be assigned @@ -2897,16 +2927,8 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value); void gpiod_set_value(struct gpio_desc *desc, int value) { VALIDATE_DESC_VOID(desc); - /* Should be using gpiod_set_value_cansleep() */ WARN_ON(desc->gdev->chip->can_sleep); - if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) - value = !value; - if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) - gpio_set_open_drain_value_commit(desc, value); - else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) - gpio_set_open_source_value_commit(desc, value); - else - gpiod_set_raw_value_commit(desc, value); + gpiod_set_value_nocheck(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_value); @@ -3234,9 +3256,7 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { might_sleep_if(extra_checks); VALIDATE_DESC_VOID(desc); - if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) - value = !value; - gpiod_set_raw_value_commit(desc, value); + gpiod_set_value_nocheck(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index af48322839c3d6004ee16a32591f1f434d364fc8..6c44d165213910a259ee94c85731ab4d1ca68431 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc) return desc - &desc->gdev->descs[0]; } -void devprop_gpiochip_set_names(struct gpio_chip *chip); +void devprop_gpiochip_set_names(struct gpio_chip *chip, + const struct fwnode_handle *fwnode); /* With descriptor prefix */ diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index d853989848d68a459548e7e36fc5a92973035e8f..deeefa7a1773b7b159e531f9eb6d365ebe77bbb8 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -27,10 +27,6 @@ config DRM_MIPI_DSI bool depends on DRM -# Separate option because drm_panel_orientation_quirks.c is shared with fbdev -config DRM_PANEL_ORIENTATION_QUIRKS - tristate - config DRM_DP_AUX_CHARDEV bool "DRM DP AUX Interface" depends on DRM @@ -154,6 +150,10 @@ config DRM_VM bool depends on DRM && MMU +config DRM_SCHED + tristate + depends on DRM + source "drivers/gpu/drm/i2c/Kconfig" source "drivers/gpu/drm/arm/Kconfig" @@ -183,6 +183,7 @@ config DRM_AMDGPU depends on DRM && PCI && MMU select FW_LOADER select DRM_KMS_HELPER + select DRM_SCHED select DRM_TTM select POWER_SUPPLY select HWMON @@ -367,6 +368,10 @@ config DRM_SAVAGE endif # DRM_LEGACY +# Separate option because drm_panel_orientation_quirks.c is shared with fbdev +config DRM_PANEL_ORIENTATION_QUIRKS + tristate + config DRM_LIB_RANDOM bool default n diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index e5bf68b9c1714583f27606b12d3a1c5fa51edb81..50093ff4479b4c41f8ca2d63d83b9423bc535ada 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o obj-$(CONFIG_DRM_ARM) += arm/ obj-$(CONFIG_DRM_TTM) += ttm/ +obj-$(CONFIG_DRM_SCHED) += scheduler/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ obj-y += amd/lib/ diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile index 8a08e81ee90d579774ca96bc70853093ba623f09..d4176a3fb7062537e81010fbb5ea42be15b55a29 100644 --- a/drivers/gpu/drm/amd/acp/Makefile +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the ACP, which is a sub-component # of AMDSOC/AMDGPU drm driver. # It provides the HW control for ACP related functionalities. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 78d609123420455a1d8811eaeb6e91ebaa26e626..d6e5b727385304e47d0cb3e6e418dcbce8f26a89 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -1,4 +1,24 @@ -# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. @@ -32,7 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \ + amdgpu_ids.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -42,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o # add GMC block amdgpu-y += \ @@ -115,10 +136,7 @@ amdgpu-y += \ amdgpu-y += amdgpu_cgs.o # GPU scheduler -amdgpu-y += \ - ../scheduler/gpu_scheduler.o \ - ../scheduler/sched_fence.o \ - amdgpu_job.o +amdgpu-y += amdgpu_job.o # ACP componet ifneq ($(CONFIG_DRM_AMD_ACP),) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5e2958a79928d0b4b4a0571cb50bca734f48cb28..d5a2eefd6c3e9c634597dba673a6ee25a60c830d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "dm_pp_interface.h" @@ -68,10 +69,9 @@ #include "amdgpu_vcn.h" #include "amdgpu_mn.h" #include "amdgpu_dm.h" -#include "gpu_scheduler.h" #include "amdgpu_virt.h" #include "amdgpu_gart.h" - +#include "amdgpu_debugfs.h" /* * Modules parameters. @@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; +extern int amdgpu_gpu_recovery; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -223,17 +224,18 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; -int amdgpu_set_clockgating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state); -int amdgpu_set_powergating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_powergating_state state); -void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); -int amdgpu_wait_for_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); -bool amdgpu_is_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); +int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state); +int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_powergating_state state); +void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags); +int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); +bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); #define AMDGPU_MAX_IP_NUM 16 @@ -258,15 +260,16 @@ struct amdgpu_ip_block { const struct amdgpu_ip_block_version *version; }; -int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, - enum amd_ip_block_type type, - u32 major, u32 minor); +int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, + enum amd_ip_block_type type, + u32 major, u32 minor); -struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, - enum amd_ip_block_type type); +struct amdgpu_ip_block * +amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, + enum amd_ip_block_type type); -int amdgpu_ip_block_add(struct amdgpu_device *adev, - const struct amdgpu_ip_block_version *ip_block_version); +int amdgpu_device_ip_block_add(struct amdgpu_device *adev, + const struct amdgpu_ip_block_version *ip_block_version); /* provided by hw blocks that can move/clear data. e.g., gfx or sdma */ struct amdgpu_buffer_funcs { @@ -346,8 +349,9 @@ struct amdgpu_gart_funcs { uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ - u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); - uint32_t (*get_invalidate_req)(unsigned int vm_id); + void (*get_vm_pde)(struct amdgpu_device *adev, int level, + u64 *dst, u64 *flags); + uint32_t (*get_invalidate_req)(unsigned int vmid); }; /* provided by the ih block */ @@ -373,9 +377,6 @@ struct amdgpu_dummy_page { struct page *page; dma_addr_t addr; }; -int amdgpu_dummy_page_init(struct amdgpu_device *adev); -void amdgpu_dummy_page_fini(struct amdgpu_device *adev); - /* * Clocks @@ -423,7 +424,6 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); -int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); /* sub-allocation manager, it has to be protected by another lock. * By conception this is an helper for other part of the driver @@ -540,6 +540,7 @@ struct amdgpu_mc { u64 private_aperture_end; /* protects concurrent invalidation */ spinlock_t invalidate_lock; + bool translate_further; }; /* @@ -650,12 +651,6 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; - -void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, - phys_addr_t *aperture_base, - size_t *aperture_size, - size_t *start_offset); - /* * IRQS. */ @@ -689,7 +684,7 @@ struct amdgpu_ib { uint32_t flags; }; -extern const struct amd_sched_backend_ops amdgpu_sched_ops; +extern const struct drm_sched_backend_ops amdgpu_sched_ops; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm); @@ -699,7 +694,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, void amdgpu_job_free_resources(struct amdgpu_job *job); void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, - struct amd_sched_entity *entity, void *owner, + struct drm_sched_entity *entity, void *owner, struct dma_fence **f); /* @@ -732,7 +727,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_ctx_ring { uint64_t sequence; struct dma_fence **fences; - struct amd_sched_entity entity; + struct drm_sched_entity entity; }; struct amdgpu_ctx { @@ -746,8 +741,8 @@ struct amdgpu_ctx { struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; bool preamble_presented; - enum amd_sched_priority init_priority; - enum amd_sched_priority override_priority; + enum drm_sched_priority init_priority; + enum drm_sched_priority override_priority; struct mutex lock; atomic_t guilty; }; @@ -767,7 +762,7 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, - enum amd_sched_priority priority); + enum drm_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -964,6 +959,7 @@ struct amdgpu_gfx_config { }; struct amdgpu_cu_info { + uint32_t simd_per_cu; uint32_t max_waves_per_simd; uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; @@ -1116,7 +1112,7 @@ struct amdgpu_cs_parser { #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ struct amdgpu_job { - struct amd_sched_job base; + struct drm_sched_job base; struct amdgpu_device *adev; struct amdgpu_vm *vm; struct amdgpu_ring *ring; @@ -1129,7 +1125,7 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - unsigned vm_id; + unsigned vmid; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; @@ -1170,10 +1166,10 @@ struct amdgpu_wb { unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; }; -int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb); -void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb); +int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); +void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); -void amdgpu_get_pcie_info(struct amdgpu_device *adev); +void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /* * SDMA @@ -1238,24 +1234,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); */ void amdgpu_test_moves(struct amdgpu_device *adev); -/* - * Debugfs - */ -struct amdgpu_debugfs { - const struct drm_info_list *files; - unsigned num_files; -}; - -int amdgpu_debugfs_add_files(struct amdgpu_device *adev, - const struct drm_info_list *files, - unsigned nfiles); -int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); - -#if defined(CONFIG_DEBUG_FS) -int amdgpu_debugfs_init(struct drm_minor *minor); -#endif - -int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); /* * amdgpu smumgr functions @@ -1410,9 +1388,6 @@ struct amdgpu_fw_vram_usage { void *va; }; -int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev); -void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev); - /* * CGS */ @@ -1428,6 +1403,80 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); + +/* + * amdgpu nbio functions + * + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; +}; + +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, + bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); +}; + + +/* Define the HW IP blocks will be used in driver , add more if necessary */ +enum amd_hw_ip_block_type { + GC_HWIP = 1, + HDP_HWIP, + SDMA0_HWIP, + SDMA1_HWIP, + MMHUB_HWIP, + ATHUB_HWIP, + NBIO_HWIP, + MP0_HWIP, + UVD_HWIP, + VCN_HWIP = UVD_HWIP, + VCE_HWIP, + DF_HWIP, + DCE_HWIP, + OSSSYS_HWIP, + SMUIO_HWIP, + PWR_HWIP, + NBIF_HWIP, + MAX_HWIP +}; + +#define HWIP_MAX_INSTANCE 6 + struct amd_powerplay { struct cgs_device *cgs_device; void *pp_handle; @@ -1620,6 +1669,11 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; + /* soc15 register offset based on ip, instance and segment */ + uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + + const struct amdgpu_nbio_funcs *nbio_funcs; + /* delayed work_func for deferring clockgating during resume */ struct delayed_work late_init_work; @@ -1785,7 +1839,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) -#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) +#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -1796,7 +1850,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c)) +#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) @@ -1835,23 +1889,25 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) /* Common functions */ -int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); -bool amdgpu_need_backup(struct amdgpu_device *adev); -void amdgpu_pci_config_reset(struct amdgpu_device *adev); -bool amdgpu_need_post(struct amdgpu_device *adev); +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job* job, bool force); +void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); +bool amdgpu_device_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); -void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); -void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); +void amdgpu_device_vram_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc, u64 base); +void amdgpu_device_gart_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); -void amdgpu_program_register_sequence(struct amdgpu_device *adev, +void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); @@ -1885,7 +1941,7 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev); int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); -int amdgpu_suspend(struct amdgpu_device *adev); +int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index c04f44a90392639e7ec3ae51da93a694ead70fee..a29362f9ef415d48a6a00ea94d0a6a8fa2b1d7b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -277,7 +277,7 @@ static int acp_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; const struct amdgpu_ip_block *ip_block = - amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); if (!ip_block) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c70cda04dbfb921a026313924b9f98c4e83e6904..1d605e1c1d66eb926e3ff102248cd79ccad3838c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -93,6 +93,39 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) adev->pdev, kfd2kgd); } +/** + * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to + * setup amdkfd + * + * @adev: amdgpu_device pointer + * @aperture_base: output returning doorbell aperture base physical address + * @aperture_size: output returning doorbell aperture size in bytes + * @start_offset: output returning # of doorbell bytes reserved for amdgpu. + * + * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, + * takes doorbells required for its own rings and reports the setup to amdkfd. + * amdgpu reserved doorbells are at the start of the doorbell aperture. + */ +static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, + phys_addr_t *aperture_base, + size_t *aperture_size, + size_t *start_offset) +{ + /* + * The first num_doorbells are used by amdgpu. + * amdkfd takes whatever's left in the aperture. + */ + if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { + *aperture_base = adev->doorbell.base; + *aperture_size = adev->doorbell.size; + *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + } else { + *aperture_base = 0; + *aperture_size = 0; + *start_offset = 0; + } +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -242,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) kfree(mem); } -uint64_t get_vmem_size(struct kgd_dev *kgd) +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) { - struct amdgpu_device *adev = - (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : + ~((1ULL << 32) - 1); + resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; + + memset(mem_info, 0, sizeof(*mem_info)); + if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->mc.visible_vram_size; + mem_info->local_mem_size_private = adev->mc.real_vram_size - + adev->mc.visible_vram_size; + } else { + mem_info->local_mem_size_public = 0; + mem_info->local_mem_size_private = adev->mc.real_vram_size; + } + mem_info->vram_width = adev->mc.vram_width; - BUG_ON(kgd == NULL); + pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", + &adev->mc.aper_base, &aper_limit, + mem_info->local_mem_size_public, + mem_info->local_mem_size_private); - return adev->mc.real_vram_size; + if (amdgpu_sriov_vf(adev)) + mem_info->mem_clk_max = adev->clock.default_mclk / 100; + else + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) @@ -265,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - /* The sclk is in quantas of 10kHz */ - return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + /* the sclk is in quantas of 10kHz */ + if (amdgpu_sriov_vf(adev)) + return adev->clock.default_sclk / 100; + + return amdgpu_dpm_get_sclk(adev, false) / 100; +} + +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_cu_info acu_info = adev->gfx.cu_info; + + memset(cu_info, 0, sizeof(*cu_info)); + if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) + return; + + cu_info->cu_active_number = acu_info.number; + cu_info->cu_ao_mask = acu_info.ao_cu_mask; + memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], + sizeof(acu_info.bitmap)); + cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; + cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + cu_info->simd_per_cu = acu_info.simd_per_cu; + cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; + cu_info->wave_front_size = acu_info.wave_front_size; + cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; + cu_info->lds_size = acu_info.lds_size; +} + +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8d689ab7e4291fad585448c6e679aa32556facff..2a519f9062eeaf3357c378f90b4c0ef8c0971fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr); void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -uint64_t get_vmem_size(struct kgd_dev *kgd); +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1e3e9be7d77ecf29883cf0ec5d5874f0cb67bd64..a9e6aea0e5f8fd1b68599786827b130ad8befa00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); @@ -166,17 +173,19 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_vm_alloc_pasid, - .free_pasid = amdgpu_vm_free_pasid, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, @@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS (35+4) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; @@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + + if (read_user_wptr(mm, wptr, data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdma_rlc_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); @@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) m->sdma_rlc_rb_rptr_addr_lo); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - m->sdma_rlc_rb_cntl); + + data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + + queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+4) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; + reg++) + DUMP_REG(sdma_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; return 0; } @@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, struct cik_sdma_rlc_registers *m; uint32_t sdma_base_addr; uint32_t temp; - int timeout = utimeout; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); @@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) break; - if (timeout <= 0) + if (time_after(jiffies, end_jiffies)) return -ETIME; - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); @@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 056929b8ccd04e29403ecb401b335aa1ea8d754a..b127259d7d8548c0d46b64c4c61ef11c51bb734e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -45,7 +45,7 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -struct cik_sdma_rlc_registers; +struct vi_sdma_mqd; /* * Register access functions @@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -125,17 +132,19 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_vm_alloc_pasid, - .free_pasid = amdgpu_vm_free_pasid, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, @@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) { - return 0; + uint32_t retval; + + retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; + pr_debug("kfd: sdma base address: 0x%x\n", retval); + + return retval; } static inline struct vi_mqd *get_mqd(void *mqd) @@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd) return (struct vi_mqd *)mqd; } -static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) { - return (struct cik_sdma_rlc_registers *)mqd; + return (struct vi_sdma_mqd *)mqd; } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS (54+4) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_sdma_mqd *m; + unsigned long end_jiffies; + uint32_t sdma_base_addr; + uint32_t data; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + + if (read_user_wptr(mm, wptr, data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdmax_rlcx_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + + queue_id * KFD_VI_SDMA_QUEUE_OFFSET; + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+4+2+3+7) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; + reg++) + DUMP_REG(sdma_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + return 0; } @@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_sdma_rlc_registers *m; + struct vi_sdma_mqd *m; uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; @@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_sdma_rlc_registers *m; + struct vi_sdma_mqd *m; uint32_t sdma_base_addr; uint32_t temp; - int timeout = utimeout; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); @@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, while (true) { temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (timeout <= 0) + if (time_after(jiffies, end_jiffies)) return -ETIME; - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 39f4d0df1adaf2352f39ae9232a1108d3bb1d950..bf872f694f5090da9ebc1fc5b227bb45364ef7d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -27,6 +27,7 @@ #include #include "amdgpu.h" #include "amdgpu_atombios.h" +#include "amdgpu_atomfirmware.h" #include "amdgpu_i2c.h" #include "atom.h" @@ -1699,7 +1700,7 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } -void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) +static void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) { uint32_t bios_2_scratch, bios_6_scratch; @@ -1721,28 +1722,6 @@ void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } -void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); -} - -void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) -{ - int i; - - /* - * VBIOS will check ASIC_INIT_COMPLETE bit to decide if - * execute ASIC_Init posting via driver - */ - adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); -} - void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung) { @@ -1798,7 +1777,7 @@ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) #endif } -int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) +static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware); @@ -1841,3 +1820,234 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) ctx->scratch_size_bytes = usage_bytes; return 0; } + +/* ATOM accessor methods */ +/* + * ATOM is an interpreted byte code stored in tables in the vbios. The + * driver registers callbacks to access registers and the interpreter + * in the driver parses the tables and executes then to program specific + * actions (set display modes, asic init, etc.). See amdgpu_atombios.c, + * atombios.h, and atom.c + */ + +/** + * cail_pll_read - read PLL register + * + * @info: atom card_info pointer + * @reg: PLL register offset + * + * Provides a PLL register accessor for the atom interpreter (r4xx+). + * Returns the value of the PLL register. + */ +static uint32_t cail_pll_read(struct card_info *info, uint32_t reg) +{ + return 0; +} + +/** + * cail_pll_write - write PLL register + * + * @info: atom card_info pointer + * @reg: PLL register offset + * @val: value to write to the pll register + * + * Provides a PLL register accessor for the atom interpreter (r4xx+). + */ +static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + +} + +/** + * cail_mc_read - read MC (Memory Controller) register + * + * @info: atom card_info pointer + * @reg: MC register offset + * + * Provides an MC register accessor for the atom interpreter (r4xx+). + * Returns the value of the MC register. + */ +static uint32_t cail_mc_read(struct card_info *info, uint32_t reg) +{ + return 0; +} + +/** + * cail_mc_write - write MC (Memory Controller) register + * + * @info: atom card_info pointer + * @reg: MC register offset + * @val: value to write to the pll register + * + * Provides a MC register accessor for the atom interpreter (r4xx+). + */ +static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + +} + +/** + * cail_reg_write - write MMIO register + * + * @info: atom card_info pointer + * @reg: MMIO register offset + * @val: value to write to the pll register + * + * Provides a MMIO register accessor for the atom interpreter (r4xx+). + */ +static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = info->dev->dev_private; + + WREG32(reg, val); +} + +/** + * cail_reg_read - read MMIO register + * + * @info: atom card_info pointer + * @reg: MMIO register offset + * + * Provides an MMIO register accessor for the atom interpreter (r4xx+). + * Returns the value of the MMIO register. + */ +static uint32_t cail_reg_read(struct card_info *info, uint32_t reg) +{ + struct amdgpu_device *adev = info->dev->dev_private; + uint32_t r; + + r = RREG32(reg); + return r; +} + +/** + * cail_ioreg_write - write IO register + * + * @info: atom card_info pointer + * @reg: IO register offset + * @val: value to write to the pll register + * + * Provides a IO register accessor for the atom interpreter (r4xx+). + */ +static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = info->dev->dev_private; + + WREG32_IO(reg, val); +} + +/** + * cail_ioreg_read - read IO register + * + * @info: atom card_info pointer + * @reg: IO register offset + * + * Provides an IO register accessor for the atom interpreter (r4xx+). + * Returns the value of the IO register. + */ +static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) +{ + struct amdgpu_device *adev = info->dev->dev_private; + uint32_t r; + + r = RREG32_IO(reg); + return r; +} + +static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + struct atom_context *ctx = adev->mode_info.atom_context; + + return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version); +} + +static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, + NULL); + +/** + * amdgpu_atombios_fini - free the driver info and callbacks for atombios + * + * @adev: amdgpu_device pointer + * + * Frees the driver info and register access callbacks for the ATOM + * interpreter (r4xx+). + * Called at driver shutdown. + */ +void amdgpu_atombios_fini(struct amdgpu_device *adev) +{ + if (adev->mode_info.atom_context) { + kfree(adev->mode_info.atom_context->scratch); + kfree(adev->mode_info.atom_context->iio); + } + kfree(adev->mode_info.atom_context); + adev->mode_info.atom_context = NULL; + kfree(adev->mode_info.atom_card_info); + adev->mode_info.atom_card_info = NULL; + device_remove_file(adev->dev, &dev_attr_vbios_version); +} + +/** + * amdgpu_atombios_init - init the driver info and callbacks for atombios + * + * @adev: amdgpu_device pointer + * + * Initializes the driver info and register access callbacks for the + * ATOM interpreter (r4xx+). + * Returns 0 on sucess, -ENOMEM on failure. + * Called at driver startup. + */ +int amdgpu_atombios_init(struct amdgpu_device *adev) +{ + struct card_info *atom_card_info = + kzalloc(sizeof(struct card_info), GFP_KERNEL); + int ret; + + if (!atom_card_info) + return -ENOMEM; + + adev->mode_info.atom_card_info = atom_card_info; + atom_card_info->dev = adev->ddev; + atom_card_info->reg_read = cail_reg_read; + atom_card_info->reg_write = cail_reg_write; + /* needed for iio ops */ + if (adev->rio_mem) { + atom_card_info->ioreg_read = cail_ioreg_read; + atom_card_info->ioreg_write = cail_ioreg_write; + } else { + DRM_DEBUG("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n"); + atom_card_info->ioreg_read = cail_reg_read; + atom_card_info->ioreg_write = cail_reg_write; + } + atom_card_info->mc_read = cail_mc_read; + atom_card_info->mc_write = cail_mc_write; + atom_card_info->pll_read = cail_pll_read; + atom_card_info->pll_write = cail_pll_write; + + adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios); + if (!adev->mode_info.atom_context) { + amdgpu_atombios_fini(adev); + return -ENOMEM; + } + + mutex_init(&adev->mode_info.atom_context->mutex); + if (adev->is_atom_fw) { + amdgpu_atomfirmware_scratch_regs_init(adev); + amdgpu_atomfirmware_allocate_fb_scratch(adev); + } else { + amdgpu_atombios_scratch_regs_init(adev); + amdgpu_atombios_allocate_fb_scratch(adev); + } + + ret = device_create_file(adev->dev, &dev_attr_vbios_version); + if (ret) { + DRM_ERROR("Failed to create device file for VBIOS version\n"); + return ret; + } + + return 0; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index b0d5d1d7fdba15d6674fdfbc744c90592cee8c8c..fd8f18074f7ad4b138f2b506979da7de472f8b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -195,9 +195,6 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); -void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev); -void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); -void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung); bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev); @@ -219,6 +216,7 @@ int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev, u8 voltage_type, u8 *svd_gpio_id, u8 *svc_gpio_id); -int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev); +void amdgpu_atombios_fini(struct amdgpu_device *adev); +int amdgpu_atombios_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index c13c51af0b681dbb1ee605b978a319ffaa4c7794..e2c3c5ec42d1557d58474441782bcfd788972a80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -14,6 +14,16 @@ #include "amd_acpi.h" +#define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0) + +struct amdgpu_px_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 px_quirk_flags; +}; + struct amdgpu_atpx_functions { bool px_params; bool power_cntl; @@ -35,6 +45,7 @@ struct amdgpu_atpx { static struct amdgpu_atpx_priv { bool atpx_detected; bool bridge_pm_usable; + unsigned int quirks; /* handle for device - and atpx */ acpi_handle dhandle; acpi_handle other_handle; @@ -205,13 +216,19 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { - printk("ATPX Hybrid Graphics\n"); - /* - * Disable legacy PM methods only when pcie port PM is usable, - * otherwise the device might fail to power off or power on. - */ - atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; - atpx->is_hybrid = true; + if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) { + printk("ATPX Hybrid Graphics, forcing to ATPX\n"); + atpx->functions.power_cntl = true; + atpx->is_hybrid = false; + } else { + printk("ATPX Hybrid Graphics\n"); + /* + * Disable legacy PM methods only when pcie port PM is usable, + * otherwise the device might fail to power off or power on. + */ + atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; + atpx->is_hybrid = true; + } } atpx->dgpu_req_power_for_displays = false; @@ -547,6 +564,30 @@ static const struct vga_switcheroo_handler amdgpu_atpx_handler = { .get_client_id = amdgpu_atpx_get_client_id, }; +static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { + /* HG _PR3 doesn't seem to work on this A+A weston board */ + { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0, 0, 0, 0, 0 }, +}; + +static void amdgpu_atpx_get_quirks(struct pci_dev *pdev) +{ + const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list; + + /* Apply PX quirks */ + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device) { + amdgpu_atpx_priv.quirks |= p->px_quirk_flags; + break; + } + ++p; + } +} + /** * amdgpu_atpx_detect - detect whether we have PX * @@ -570,6 +611,7 @@ static bool amdgpu_atpx_detect(void) parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; + amdgpu_atpx_get_quirks(pdev); } while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { @@ -579,6 +621,7 @@ static bool amdgpu_atpx_detect(void) parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; + amdgpu_atpx_get_quirks(pdev); } if (has_atpx && vga_count == 2) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 057e1ecd83cec5746319adb88602eb95111104c6..a5df80d50d447a5dc0dd3a9a29c8ad21677e485c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -93,7 +93,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev) resource_size_t size = 256 * 1024; /* ??? */ if (!(adev->flags & AMD_IS_APU)) - if (amdgpu_need_post(adev)) + if (amdgpu_device_need_post(adev)) return false; adev->bios = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 85d2149b9dbeef43bb6e9ae3e54ae0c3240d74ec..4466f3535e2d95ffe8e4b2241741f7d795768437 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -801,6 +801,11 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, else strcpy(fw_name, "amdgpu/vega10_smc.bin"); break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_RAVEN: + adev->pm.fw_version = info->version; + return 0; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 4cea9ab237ac3cd36813a610b5623746932a1a24..e80fc38141b57c5d00b0221fc2cd7c66dfe275dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -343,7 +343,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_operation_ctx ctx = { true, false }; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + .allow_reserved_eviction = false, + .resv = bo->tbo.resv + }; uint32_t domain; int r; @@ -773,10 +778,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) struct amdgpu_bo *bo; int i, r; - r = amdgpu_vm_update_directories(adev, vm); - if (r) - return r; - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -834,6 +835,10 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (r) return r; + r = amdgpu_vm_update_directories(adev, vm); + if (r) + return r; + r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); if (r) return r; @@ -1150,7 +1155,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_ring *ring = p->job->ring; - struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; struct amdgpu_job *job; unsigned i; uint64_t seq; @@ -1173,7 +1178,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job = p->job; p->job = NULL; - r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); + r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); if (r) { amdgpu_job_free(job); amdgpu_mn_unlock(p->mn); @@ -1202,7 +1207,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); - amd_sched_entity_push_job(&job->base, entity); + drm_sched_entity_push_job(&job->base, entity); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d71dc164b4693b5b0e67405eaee5990a0e4af5e1..09d35051fdd68689ac00d69504765166e738076f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,10 +28,10 @@ #include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { /* NORMAL and below are accessible by everyone */ - if (priority <= AMD_SCHED_PRIORITY_NORMAL) + if (priority <= DRM_SCHED_PRIORITY_NORMAL) return 0; if (capable(CAP_SYS_NICE)) @@ -44,14 +44,14 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, } static int amdgpu_ctx_init(struct amdgpu_device *adev, - enum amd_sched_priority priority, + enum drm_sched_priority priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { unsigned i, j; int r; - if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) return -EINVAL; r = amdgpu_ctx_priority_permit(filp, priority); @@ -78,19 +78,19 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->reset_counter_query = ctx->reset_counter; ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; - ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; rq = &ring->sched.sched_rq[priority]; if (ring == &adev->gfx.kiq.ring) continue; - r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, + r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, rq, amdgpu_sched_jobs, &ctx->guilty); if (r) goto failed; @@ -104,7 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, failed: for (j = 0; j < i; j++) - amd_sched_entity_fini(&adev->rings[j]->sched, + drm_sched_entity_fini(&adev->rings[j]->sched, &ctx->rings[j].entity); kfree(ctx->fences); ctx->fences = NULL; @@ -126,7 +126,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) ctx->fences = NULL; for (i = 0; i < adev->num_rings; i++) - amd_sched_entity_fini(&adev->rings[i]->sched, + drm_sched_entity_fini(&adev->rings[i]->sched, &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); @@ -137,7 +137,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, struct drm_file *filp, - enum amd_sched_priority priority, + enum drm_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -266,7 +266,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, { int r; uint32_t id; - enum amd_sched_priority priority; + enum drm_sched_priority priority; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; @@ -278,8 +278,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, /* For backwards compatibility reasons, we need to accept * ioctls with garbage in the priority field */ - if (priority == AMD_SCHED_PRIORITY_INVALID) - priority = AMD_SCHED_PRIORITY_NORMAL; + if (priority == DRM_SCHED_PRIORITY_INVALID) + priority = DRM_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: @@ -385,18 +385,18 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, } void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { int i; struct amdgpu_device *adev = ctx->adev; - struct amd_sched_rq *rq; - struct amd_sched_entity *entity; + struct drm_sched_rq *rq; + struct drm_sched_entity *entity; struct amdgpu_ring *ring; - enum amd_sched_priority ctx_prio; + enum drm_sched_priority ctx_prio; ctx->override_priority = priority; - ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ? + ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; for (i = 0; i < adev->num_rings; i++) { @@ -407,7 +407,7 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) continue; - amd_sched_entity_set_rq(entity, rq); + drm_sched_entity_set_rq(entity, rq); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c new file mode 100644 index 0000000000000000000000000000000000000000..ee76b468774a313739b202273959ce6161fa34ab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -0,0 +1,792 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include "amdgpu.h" + +/* + * Debugfs + */ +int amdgpu_debugfs_add_files(struct amdgpu_device *adev, + const struct drm_info_list *files, + unsigned nfiles) +{ + unsigned i; + + for (i = 0; i < adev->debugfs_count; i++) { + if (adev->debugfs[i].files == files) { + /* Already registered */ + return 0; + } + } + + i = adev->debugfs_count + 1; + if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) { + DRM_ERROR("Reached maximum number of debugfs components.\n"); + DRM_ERROR("Report so we increase " + "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n"); + return -EINVAL; + } + adev->debugfs[adev->debugfs_count].files = files; + adev->debugfs[adev->debugfs_count].num_files = nfiles; + adev->debugfs_count = i; +#if defined(CONFIG_DEBUG_FS) + drm_debugfs_create_files(files, nfiles, + adev->ddev->primary->debugfs_root, + adev->ddev->primary); +#endif + return 0; +} + +#if defined(CONFIG_DEBUG_FS) + +static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + bool pm_pg_lock, use_bank; + unsigned instance_bank, sh_bank, se_bank; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + /* are we reading registers for which a PG lock is necessary? */ + pm_pg_lock = (*pos >> 23) & 1; + + if (*pos & (1ULL << 62)) { + se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; + sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; + instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; + + if (se_bank == 0x3FF) + se_bank = 0xFFFFFFFF; + if (sh_bank == 0x3FF) + sh_bank = 0xFFFFFFFF; + if (instance_bank == 0x3FF) + instance_bank = 0xFFFFFFFF; + use_bank = 1; + } else { + use_bank = 0; + } + + *pos &= (1UL << 22) - 1; + + if (use_bank) { + if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + return -EINVAL; + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se_bank, + sh_bank, instance_bank); + } + + if (pm_pg_lock) + mutex_lock(&adev->pm.mutex); + + while (size) { + uint32_t value; + + if (*pos > adev->rmmio_size) + goto end; + + value = RREG32(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto end; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + +end: + if (use_bank) { + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + + if (pm_pg_lock) + mutex_unlock(&adev->pm.mutex); + + return result; +} + +static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + bool pm_pg_lock, use_bank; + unsigned instance_bank, sh_bank, se_bank; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + /* are we reading registers for which a PG lock is necessary? */ + pm_pg_lock = (*pos >> 23) & 1; + + if (*pos & (1ULL << 62)) { + se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; + sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; + instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; + + if (se_bank == 0x3FF) + se_bank = 0xFFFFFFFF; + if (sh_bank == 0x3FF) + sh_bank = 0xFFFFFFFF; + if (instance_bank == 0x3FF) + instance_bank = 0xFFFFFFFF; + use_bank = 1; + } else { + use_bank = 0; + } + + *pos &= (1UL << 22) - 1; + + if (use_bank) { + if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + return -EINVAL; + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se_bank, + sh_bank, instance_bank); + } + + if (pm_pg_lock) + mutex_lock(&adev->pm.mutex); + + while (size) { + uint32_t value; + + if (*pos > adev->rmmio_size) + return result; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + if (use_bank) { + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + + if (pm_pg_lock) + mutex_unlock(&adev->pm.mutex); + + return result; +} + +static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_PCIE(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_PCIE(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_DIDT(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_DIDT(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_SMC(*pos); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_SMC(*pos, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + uint32_t *config, no_regs = 0; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + config = kmalloc_array(256, sizeof(*config), GFP_KERNEL); + if (!config) + return -ENOMEM; + + /* version, increment each time something is added */ + config[no_regs++] = 3; + config[no_regs++] = adev->gfx.config.max_shader_engines; + config[no_regs++] = adev->gfx.config.max_tile_pipes; + config[no_regs++] = adev->gfx.config.max_cu_per_sh; + config[no_regs++] = adev->gfx.config.max_sh_per_se; + config[no_regs++] = adev->gfx.config.max_backends_per_se; + config[no_regs++] = adev->gfx.config.max_texture_channel_caches; + config[no_regs++] = adev->gfx.config.max_gprs; + config[no_regs++] = adev->gfx.config.max_gs_threads; + config[no_regs++] = adev->gfx.config.max_hw_contexts; + config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend; + config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend; + config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size; + config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size; + config[no_regs++] = adev->gfx.config.num_tile_pipes; + config[no_regs++] = adev->gfx.config.backend_enable_mask; + config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes; + config[no_regs++] = adev->gfx.config.mem_row_size_in_kb; + config[no_regs++] = adev->gfx.config.shader_engine_tile_size; + config[no_regs++] = adev->gfx.config.num_gpus; + config[no_regs++] = adev->gfx.config.multi_gpu_tile_size; + config[no_regs++] = adev->gfx.config.mc_arb_ramcfg; + config[no_regs++] = adev->gfx.config.gb_addr_config; + config[no_regs++] = adev->gfx.config.num_rbs; + + /* rev==1 */ + config[no_regs++] = adev->rev_id; + config[no_regs++] = adev->pg_flags; + config[no_regs++] = adev->cg_flags; + + /* rev==2 */ + config[no_regs++] = adev->family; + config[no_regs++] = adev->external_rev_id; + + /* rev==3 */ + config[no_regs++] = adev->pdev->device; + config[no_regs++] = adev->pdev->revision; + config[no_regs++] = adev->pdev->subsystem_device; + config[no_regs++] = adev->pdev->subsystem_vendor; + + while (size && (*pos < no_regs * 4)) { + uint32_t value; + + value = config[*pos >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) { + kfree(config); + return r; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + kfree(config); + return result; +} + +static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + int idx, x, outsize, r, valuesize; + uint32_t values[16]; + + if (size & 3 || *pos & 0x3) + return -EINVAL; + + if (amdgpu_dpm == 0) + return -EINVAL; + + /* convert offset to sensor number */ + idx = *pos >> 2; + + valuesize = sizeof(values); + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + else + return -EINVAL; + + if (size > valuesize) + return -EINVAL; + + outsize = 0; + x = 0; + if (!r) { + while (size) { + r = put_user(values[x++], (int32_t *)buf); + buf += 4; + size -= 4; + outsize += 4; + } + } + + return !r ? outsize : r; +} + +static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + int r, x; + ssize_t result=0; + uint32_t offset, se, sh, cu, wave, simd, data[32]; + + if (size & 3 || *pos & 3) + return -EINVAL; + + /* decode offset */ + offset = (*pos & GENMASK_ULL(6, 0)); + se = (*pos & GENMASK_ULL(14, 7)) >> 7; + sh = (*pos & GENMASK_ULL(22, 15)) >> 15; + cu = (*pos & GENMASK_ULL(30, 23)) >> 23; + wave = (*pos & GENMASK_ULL(36, 31)) >> 31; + simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se, sh, cu); + + x = 0; + if (adev->gfx.funcs->read_wave_data) + adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + mutex_unlock(&adev->grbm_idx_mutex); + + if (!x) + return -EINVAL; + + while (size && (offset < x * 4)) { + uint32_t value; + + value = data[offset >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + offset += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + int r; + ssize_t result = 0; + uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; + + if (size & 3 || *pos & 3) + return -EINVAL; + + /* decode offset */ + offset = *pos & GENMASK_ULL(11, 0); + se = (*pos & GENMASK_ULL(19, 12)) >> 12; + sh = (*pos & GENMASK_ULL(27, 20)) >> 20; + cu = (*pos & GENMASK_ULL(35, 28)) >> 28; + wave = (*pos & GENMASK_ULL(43, 36)) >> 36; + simd = (*pos & GENMASK_ULL(51, 44)) >> 44; + thread = (*pos & GENMASK_ULL(59, 52)) >> 52; + bank = (*pos & GENMASK_ULL(61, 60)) >> 60; + + data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se, sh, cu); + + if (bank == 0) { + if (adev->gfx.funcs->read_wave_vgprs) + adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); + } else { + if (adev->gfx.funcs->read_wave_sgprs) + adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); + } + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + mutex_unlock(&adev->grbm_idx_mutex); + + while (size) { + uint32_t value; + + value = data[offset++]; + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto err; + } + + result += 4; + buf += 4; + size -= 4; + } + +err: + kfree(data); + return result; +} + +static const struct file_operations amdgpu_debugfs_regs_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_read, + .write = amdgpu_debugfs_regs_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_didt_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_didt_read, + .write = amdgpu_debugfs_regs_didt_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_pcie_read, + .write = amdgpu_debugfs_regs_pcie_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_smc_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_smc_read, + .write = amdgpu_debugfs_regs_smc_write, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_gca_config_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gca_config_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_sensors_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_sensor_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_wave_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_wave_read, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_gpr_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gpr_read, + .llseek = default_llseek +}; + +static const struct file_operations *debugfs_regs[] = { + &amdgpu_debugfs_regs_fops, + &amdgpu_debugfs_regs_didt_fops, + &amdgpu_debugfs_regs_pcie_fops, + &amdgpu_debugfs_regs_smc_fops, + &amdgpu_debugfs_gca_config_fops, + &amdgpu_debugfs_sensors_fops, + &amdgpu_debugfs_wave_fops, + &amdgpu_debugfs_gpr_fops, +}; + +static const char *debugfs_regs_names[] = { + "amdgpu_regs", + "amdgpu_regs_didt", + "amdgpu_regs_pcie", + "amdgpu_regs_smc", + "amdgpu_gca_config", + "amdgpu_sensors", + "amdgpu_wave", + "amdgpu_gpr", +}; + +int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + struct drm_minor *minor = adev->ddev->primary; + struct dentry *ent, *root = minor->debugfs_root; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { + ent = debugfs_create_file(debugfs_regs_names[i], + S_IFREG | S_IRUGO, root, + adev, debugfs_regs[i]); + if (IS_ERR(ent)) { + for (j = 0; j < i; j++) { + debugfs_remove(adev->debugfs_regs[i]); + adev->debugfs_regs[i] = NULL; + } + return PTR_ERR(ent); + } + + if (!i) + i_size_write(ent->d_inode, adev->rmmio_size); + adev->debugfs_regs[i] = ent; + } + + return 0; +} + +void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { + if (adev->debugfs_regs[i]) { + debugfs_remove(adev->debugfs_regs[i]); + adev->debugfs_regs[i] = NULL; + } + } +} + +static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + int r = 0, i; + + /* hold on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_park(ring->sched.thread); + } + + seq_printf(m, "run ib test:\n"); + r = amdgpu_ib_ring_tests(adev); + if (r) + seq_printf(m, "ib ring tests failed (%d).\n", r); + else + seq_printf(m, "ib ring tests passed.\n"); + + /* go on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_unpark(ring->sched.thread); + } + + return 0; +} + +static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_write(m, adev->bios, adev->bios_size); + return 0; +} + +static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + return 0; +} + +static const struct drm_info_list amdgpu_debugfs_list[] = { + {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, + {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, + {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram} +}; + +int amdgpu_debugfs_init(struct amdgpu_device *adev) +{ + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, + ARRAY_SIZE(amdgpu_debugfs_list)); +} + +#else +int amdgpu_debugfs_init(struct amdgpu_device *adev) +{ + return 0; +} +int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + return 0; +} +void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } +#endif diff --git a/drivers/gpu/drm/amd/display/dc/basics/grph_object_id.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h similarity index 50% rename from drivers/gpu/drm/amd/display/dc/basics/grph_object_id.c rename to drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 147822545252ed129c694aa5e7bf870aab4955f2..8260d8073c2688c67bbe2018807bc9957a3f6749 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/grph_object_id.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -1,5 +1,7 @@ /* - * Copyright 2012-15 Advanced Micro Devices, Inc. + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -19,57 +21,22 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * - * Authors: AMD - * */ -#include "dm_services.h" -#include "include/grph_object_id.h" - -static bool dal_graphics_object_id_is_valid(struct graphics_object_id id) -{ - bool rc = true; - - switch (id.type) { - case OBJECT_TYPE_UNKNOWN: - rc = false; - break; - case OBJECT_TYPE_GPU: - case OBJECT_TYPE_ENGINE: - /* do NOT check for id.id == 0 */ - if (id.enum_id == ENUM_ID_UNKNOWN) - rc = false; - break; - default: - if (id.id == 0 || id.enum_id == ENUM_ID_UNKNOWN) - rc = false; - break; - } - - return rc; -} - -bool dal_graphics_object_id_is_equal( - struct graphics_object_id id1, - struct graphics_object_id id2) -{ - if (false == dal_graphics_object_id_is_valid(id1)) { - dm_output_to_console( - "%s: Warning: comparing invalid object 'id1'!\n", __func__); - return false; - } - - if (false == dal_graphics_object_id_is_valid(id2)) { - dm_output_to_console( - "%s: Warning: comparing invalid object 'id2'!\n", __func__); - return false; - } - - if (id1.id == id2.id && id1.enum_id == id2.enum_id - && id1.type == id2.type) - return true; - - return false; -} - - +/* + * Debugfs + */ +struct amdgpu_debugfs { + const struct drm_info_list *files; + unsigned num_files; +}; + +int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); +void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); +int amdgpu_debugfs_init(struct amdgpu_device *adev); +int amdgpu_debugfs_add_files(struct amdgpu_device *adev, + const struct drm_info_list *files, + unsigned nfiles); +int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); +int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); +int amdgpu_debugfs_gem_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 70c9e5756b02a66fdda95f5340ad860337ec1328..00a50cc5ec9a31a77b7476020e5f47089c83c5de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -64,11 +63,6 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); -static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); -static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); -static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev); - static const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", @@ -333,7 +327,7 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, BUG(); } -static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) +static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) { return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, @@ -342,13 +336,13 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) (void **)&adev->vram_scratch.ptr); } -static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) +static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); } /** - * amdgpu_program_register_sequence - program an array of registers. + * amdgpu_device_program_register_sequence - program an array of registers. * * @adev: amdgpu_device pointer * @registers: pointer to the register array @@ -357,9 +351,9 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) * Programs an array or registers with and and or masks. * This is a helper for setting golden registers. */ -void amdgpu_program_register_sequence(struct amdgpu_device *adev, - const u32 *registers, - const u32 array_size) +void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, + const u32 *registers, + const u32 array_size) { u32 tmp, reg, and_mask, or_mask; int i; @@ -383,7 +377,7 @@ void amdgpu_program_register_sequence(struct amdgpu_device *adev, } } -void amdgpu_pci_config_reset(struct amdgpu_device *adev) +void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) { pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); } @@ -392,14 +386,14 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev) * GPU doorbell aperture helpers function. */ /** - * amdgpu_doorbell_init - Init doorbell driver information. + * amdgpu_device_doorbell_init - Init doorbell driver information. * * @adev: amdgpu_device pointer * * Init doorbell driver information (CIK) * Returns 0 on success, error on failure. */ -static int amdgpu_doorbell_init(struct amdgpu_device *adev) +static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { @@ -432,66 +426,35 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) } /** - * amdgpu_doorbell_fini - Tear down doorbell driver information. + * amdgpu_device_doorbell_fini - Tear down doorbell driver information. * * @adev: amdgpu_device pointer * * Tear down doorbell driver information (CIK) */ -static void amdgpu_doorbell_fini(struct amdgpu_device *adev) +static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) { iounmap(adev->doorbell.ptr); adev->doorbell.ptr = NULL; } -/** - * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to - * setup amdkfd - * - * @adev: amdgpu_device pointer - * @aperture_base: output returning doorbell aperture base physical address - * @aperture_size: output returning doorbell aperture size in bytes - * @start_offset: output returning # of doorbell bytes reserved for amdgpu. - * - * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, - * takes doorbells required for its own rings and reports the setup to amdkfd. - * amdgpu reserved doorbells are at the start of the doorbell aperture. - */ -void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, - phys_addr_t *aperture_base, - size_t *aperture_size, - size_t *start_offset) -{ - /* - * The first num_doorbells are used by amdgpu. - * amdkfd takes whatever's left in the aperture. - */ - if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { - *aperture_base = adev->doorbell.base; - *aperture_size = adev->doorbell.size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); - } else { - *aperture_base = 0; - *aperture_size = 0; - *start_offset = 0; - } -} + /* - * amdgpu_wb_*() + * amdgpu_device_wb_*() * Writeback is the method by which the GPU updates special pages in memory * with the status of certain GPU events (fences, ring pointers,etc.). */ /** - * amdgpu_wb_fini - Disable Writeback and free memory + * amdgpu_device_wb_fini - Disable Writeback and free memory * * @adev: amdgpu_device pointer * * Disables Writeback and frees the Writeback memory (all asics). * Used at driver shutdown. */ -static void amdgpu_wb_fini(struct amdgpu_device *adev) +static void amdgpu_device_wb_fini(struct amdgpu_device *adev) { if (adev->wb.wb_obj) { amdgpu_bo_free_kernel(&adev->wb.wb_obj, @@ -502,7 +465,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) } /** - * amdgpu_wb_init- Init Writeback driver info and allocate memory + * amdgpu_device_wb_init- Init Writeback driver info and allocate memory * * @adev: amdgpu_device pointer * @@ -510,7 +473,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) * Used at driver startup. * Returns 0 on success or an -error on failure. */ -static int amdgpu_wb_init(struct amdgpu_device *adev) +static int amdgpu_device_wb_init(struct amdgpu_device *adev) { int r; @@ -536,7 +499,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) } /** - * amdgpu_wb_get - Allocate a wb entry + * amdgpu_device_wb_get - Allocate a wb entry * * @adev: amdgpu_device pointer * @wb: wb index @@ -544,7 +507,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) * Allocate a wb slot for use by the driver (all asics). * Returns 0 on success or -EINVAL on failure. */ -int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) +int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); @@ -558,21 +521,21 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) } /** - * amdgpu_wb_free - Free a wb entry + * amdgpu_device_wb_free - Free a wb entry * * @adev: amdgpu_device pointer * @wb: wb index * * Free a wb slot allocated for use by the driver (all asics) */ -void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) +void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { if (wb < adev->wb.num_wb) __clear_bit(wb >> 3, adev->wb.used); } /** - * amdgpu_vram_location - try to find VRAM location + * amdgpu_device_vram_location - try to find VRAM location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @base: base address at which to put VRAM @@ -580,7 +543,8 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) * Function will try to place VRAM at base address provided * as parameter. */ -void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base) +void amdgpu_device_vram_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc, u64 base) { uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; @@ -594,7 +558,7 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 } /** - * amdgpu_gart_location - try to find GTT location + * amdgpu_device_gart_location - try to find GTT location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @@ -605,7 +569,8 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 * * FIXME: when reducing GTT size align new size on power of 2. */ -void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) +void amdgpu_device_gart_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc) { u64 size_af, size_bf; @@ -632,101 +597,6 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) mc->gart_size >> 20, mc->gart_start, mc->gart_end); } -/* - * Firmware Reservation functions - */ -/** - * amdgpu_fw_reserve_vram_fini - free fw reserved vram - * - * @adev: amdgpu_device pointer - * - * free fw reserved vram if it has been reserved. - */ -void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, - NULL, &adev->fw_vram_usage.va); -} - -/** - * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from fw. - */ -int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev) -{ - struct ttm_operation_ctx ctx = { false, false }; - int r = 0; - int i; - u64 vram_size = adev->mc.visible_vram_size; - u64 offset = adev->fw_vram_usage.start_offset; - u64 size = adev->fw_vram_usage.size; - struct amdgpu_bo *bo; - - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - - if (adev->fw_vram_usage.size > 0 && - adev->fw_vram_usage.size <= vram_size) { - - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, - &adev->fw_vram_usage.reserved_bo); - if (r) - goto error_create; - - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); - if (r) - goto error_reserve; - - /* remove the original mem node and create a new one at the - * request position - */ - bo = adev->fw_vram_usage.reserved_bo; - offset = ALIGN(offset, PAGE_SIZE); - for (i = 0; i < bo->placement.num_placement; ++i) { - bo->placements[i].fpfn = offset >> PAGE_SHIFT; - bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; - } - - ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); - r = ttm_bo_mem_space(&bo->tbo, &bo->placement, - &bo->tbo.mem, &ctx); - if (r) - goto error_pin; - - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, - AMDGPU_GEM_DOMAIN_VRAM, - adev->fw_vram_usage.start_offset, - (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size), NULL); - if (r) - goto error_pin; - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); - if (r) - goto error_kmap; - - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); - } - return r; - -error_kmap: - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); -error_pin: - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); -error_reserve: - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); -error_create: - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - return r; -} - /** * amdgpu_device_resize_fb_bar - try to resize FB BAR * @@ -756,7 +626,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) root = root->parent; pci_bus_for_each_resource(root, res, i) { - if (res && res->flags & IORESOURCE_MEM_64 && + if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && res->start > 0x100000000ull) break; } @@ -771,7 +641,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) cmd & ~PCI_COMMAND_MEMORY); /* Free the VRAM and doorbell BAR, we most likely need to move both. */ - amdgpu_doorbell_fini(adev); + amdgpu_device_doorbell_fini(adev); if (adev->asic_type >= CHIP_BONAIRE) pci_release_resource(adev->pdev, 2); @@ -788,7 +658,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) /* When the doorbell or fb BAR isn't available we have no chance of * using the device. */ - r = amdgpu_doorbell_init(adev); + r = amdgpu_device_doorbell_init(adev); if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) return -ENODEV; @@ -801,7 +671,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) * GPU helpers function. */ /** - * amdgpu_need_post - check if the hw need post or not + * amdgpu_device_need_post - check if the hw need post or not * * @adev: amdgpu_device pointer * @@ -809,7 +679,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) * or post is needed if hw reset is performed. * Returns true if need or false if not. */ -bool amdgpu_need_post(struct amdgpu_device *adev) +bool amdgpu_device_need_post(struct amdgpu_device *adev) { uint32_t reg; @@ -854,285 +724,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev) return true; } -/** - * amdgpu_dummy_page_init - init dummy page used by the driver - * - * @adev: amdgpu_device pointer - * - * Allocate the dummy page used by the driver (all asics). - * This dummy page is used by the driver as a filler for gart entries - * when pages are taken out of the GART - * Returns 0 on sucess, -ENOMEM on failure. - */ -int amdgpu_dummy_page_init(struct amdgpu_device *adev) -{ - if (adev->dummy_page.page) - return 0; - adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); - if (adev->dummy_page.page == NULL) - return -ENOMEM; - adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page, - 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) { - dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; - return -ENOMEM; - } - return 0; -} - -/** - * amdgpu_dummy_page_fini - free dummy page used by the driver - * - * @adev: amdgpu_device pointer - * - * Frees the dummy page used by the driver (all asics). - */ -void amdgpu_dummy_page_fini(struct amdgpu_device *adev) -{ - if (adev->dummy_page.page == NULL) - return; - pci_unmap_page(adev->pdev, adev->dummy_page.addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; -} - - -/* ATOM accessor methods */ -/* - * ATOM is an interpreted byte code stored in tables in the vbios. The - * driver registers callbacks to access registers and the interpreter - * in the driver parses the tables and executes then to program specific - * actions (set display modes, asic init, etc.). See amdgpu_atombios.c, - * atombios.h, and atom.c - */ - -/** - * cail_pll_read - read PLL register - * - * @info: atom card_info pointer - * @reg: PLL register offset - * - * Provides a PLL register accessor for the atom interpreter (r4xx+). - * Returns the value of the PLL register. - */ -static uint32_t cail_pll_read(struct card_info *info, uint32_t reg) -{ - return 0; -} - -/** - * cail_pll_write - write PLL register - * - * @info: atom card_info pointer - * @reg: PLL register offset - * @val: value to write to the pll register - * - * Provides a PLL register accessor for the atom interpreter (r4xx+). - */ -static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - -} - -/** - * cail_mc_read - read MC (Memory Controller) register - * - * @info: atom card_info pointer - * @reg: MC register offset - * - * Provides an MC register accessor for the atom interpreter (r4xx+). - * Returns the value of the MC register. - */ -static uint32_t cail_mc_read(struct card_info *info, uint32_t reg) -{ - return 0; -} - -/** - * cail_mc_write - write MC (Memory Controller) register - * - * @info: atom card_info pointer - * @reg: MC register offset - * @val: value to write to the pll register - * - * Provides a MC register accessor for the atom interpreter (r4xx+). - */ -static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - -} - -/** - * cail_reg_write - write MMIO register - * - * @info: atom card_info pointer - * @reg: MMIO register offset - * @val: value to write to the pll register - * - * Provides a MMIO register accessor for the atom interpreter (r4xx+). - */ -static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - struct amdgpu_device *adev = info->dev->dev_private; - - WREG32(reg, val); -} - -/** - * cail_reg_read - read MMIO register - * - * @info: atom card_info pointer - * @reg: MMIO register offset - * - * Provides an MMIO register accessor for the atom interpreter (r4xx+). - * Returns the value of the MMIO register. - */ -static uint32_t cail_reg_read(struct card_info *info, uint32_t reg) -{ - struct amdgpu_device *adev = info->dev->dev_private; - uint32_t r; - - r = RREG32(reg); - return r; -} - -/** - * cail_ioreg_write - write IO register - * - * @info: atom card_info pointer - * @reg: IO register offset - * @val: value to write to the pll register - * - * Provides a IO register accessor for the atom interpreter (r4xx+). - */ -static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - struct amdgpu_device *adev = info->dev->dev_private; - - WREG32_IO(reg, val); -} - -/** - * cail_ioreg_read - read IO register - * - * @info: atom card_info pointer - * @reg: IO register offset - * - * Provides an IO register accessor for the atom interpreter (r4xx+). - * Returns the value of the IO register. - */ -static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) -{ - struct amdgpu_device *adev = info->dev->dev_private; - uint32_t r; - - r = RREG32_IO(reg); - return r; -} - -static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - struct atom_context *ctx = adev->mode_info.atom_context; - - return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version); -} - -static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, - NULL); - -/** - * amdgpu_atombios_fini - free the driver info and callbacks for atombios - * - * @adev: amdgpu_device pointer - * - * Frees the driver info and register access callbacks for the ATOM - * interpreter (r4xx+). - * Called at driver shutdown. - */ -static void amdgpu_atombios_fini(struct amdgpu_device *adev) -{ - if (adev->mode_info.atom_context) { - kfree(adev->mode_info.atom_context->scratch); - kfree(adev->mode_info.atom_context->iio); - } - kfree(adev->mode_info.atom_context); - adev->mode_info.atom_context = NULL; - kfree(adev->mode_info.atom_card_info); - adev->mode_info.atom_card_info = NULL; - device_remove_file(adev->dev, &dev_attr_vbios_version); -} - -/** - * amdgpu_atombios_init - init the driver info and callbacks for atombios - * - * @adev: amdgpu_device pointer - * - * Initializes the driver info and register access callbacks for the - * ATOM interpreter (r4xx+). - * Returns 0 on sucess, -ENOMEM on failure. - * Called at driver startup. - */ -static int amdgpu_atombios_init(struct amdgpu_device *adev) -{ - struct card_info *atom_card_info = - kzalloc(sizeof(struct card_info), GFP_KERNEL); - int ret; - - if (!atom_card_info) - return -ENOMEM; - - adev->mode_info.atom_card_info = atom_card_info; - atom_card_info->dev = adev->ddev; - atom_card_info->reg_read = cail_reg_read; - atom_card_info->reg_write = cail_reg_write; - /* needed for iio ops */ - if (adev->rio_mem) { - atom_card_info->ioreg_read = cail_ioreg_read; - atom_card_info->ioreg_write = cail_ioreg_write; - } else { - DRM_DEBUG("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n"); - atom_card_info->ioreg_read = cail_reg_read; - atom_card_info->ioreg_write = cail_reg_write; - } - atom_card_info->mc_read = cail_mc_read; - atom_card_info->mc_write = cail_mc_write; - atom_card_info->pll_read = cail_pll_read; - atom_card_info->pll_write = cail_pll_write; - - adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios); - if (!adev->mode_info.atom_context) { - amdgpu_atombios_fini(adev); - return -ENOMEM; - } - - mutex_init(&adev->mode_info.atom_context->mutex); - if (adev->is_atom_fw) { - amdgpu_atomfirmware_scratch_regs_init(adev); - amdgpu_atomfirmware_allocate_fb_scratch(adev); - } else { - amdgpu_atombios_scratch_regs_init(adev); - amdgpu_atombios_allocate_fb_scratch(adev); - } - - ret = device_create_file(adev->dev, &dev_attr_vbios_version); - if (ret) { - DRM_ERROR("Failed to create device file for VBIOS version\n"); - return ret; - } - - return 0; -} - /* if we get transitioned to only one device, take VGA back */ /** - * amdgpu_vga_set_decode - enable/disable vga decode + * amdgpu_device_vga_set_decode - enable/disable vga decode * * @cookie: amdgpu_device pointer * @state: enable/disable vga decode @@ -1140,7 +734,7 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev) * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ -static unsigned int amdgpu_vga_set_decode(void *cookie, bool state) +static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) { struct amdgpu_device *adev = cookie; amdgpu_asic_set_vga_state(adev, state); @@ -1151,7 +745,7 @@ static unsigned int amdgpu_vga_set_decode(void *cookie, bool state) return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; } -static void amdgpu_check_block_size(struct amdgpu_device *adev) +static void amdgpu_device_check_block_size(struct amdgpu_device *adev) { /* defines number of bits in page table versus page directory, * a page is 4KB so we have 12 bits offset, minimum 9 bits in the @@ -1166,7 +760,7 @@ static void amdgpu_check_block_size(struct amdgpu_device *adev) } } -static void amdgpu_check_vm_size(struct amdgpu_device *adev) +static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) { /* no need to check the default value */ if (amdgpu_vm_size == -1) @@ -1180,14 +774,14 @@ static void amdgpu_check_vm_size(struct amdgpu_device *adev) } /** - * amdgpu_check_arguments - validate module params + * amdgpu_device_check_arguments - validate module params * * @adev: amdgpu_device pointer * * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ -static void amdgpu_check_arguments(struct amdgpu_device *adev) +static void amdgpu_device_check_arguments(struct amdgpu_device *adev) { if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", @@ -1220,9 +814,9 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_vm_fragment_size = -1; } - amdgpu_check_vm_size(adev); + amdgpu_device_check_vm_size(adev); - amdgpu_check_block_size(adev); + amdgpu_device_check_block_size(adev); if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || !is_power_of_2(amdgpu_vram_page_split))) { @@ -1230,6 +824,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_vram_page_split); amdgpu_vram_page_split = 1024; } + + if (amdgpu_lockup_timeout == 0) { + dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); + amdgpu_lockup_timeout = 10000; + } } /** @@ -1293,9 +892,9 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { .can_switch = amdgpu_switcheroo_can_switch, }; -int amdgpu_set_clockgating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state) +int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state) { int i, r = 0; @@ -1315,9 +914,9 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev, return r; } -int amdgpu_set_powergating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_powergating_state state) +int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_powergating_state state) { int i, r = 0; @@ -1337,7 +936,8 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev, return r; } -void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) +void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) { int i; @@ -1349,8 +949,8 @@ void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) } } -int amdgpu_wait_for_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) +int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) { int i, r; @@ -1368,8 +968,8 @@ int amdgpu_wait_for_idle(struct amdgpu_device *adev, } -bool amdgpu_is_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) +bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) { int i; @@ -1383,8 +983,9 @@ bool amdgpu_is_idle(struct amdgpu_device *adev, } -struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, - enum amd_ip_block_type type) +struct amdgpu_ip_block * +amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, + enum amd_ip_block_type type) { int i; @@ -1396,7 +997,7 @@ struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, } /** - * amdgpu_ip_block_version_cmp + * amdgpu_device_ip_block_version_cmp * * @adev: amdgpu_device pointer * @type: enum amd_ip_block_type @@ -1406,11 +1007,11 @@ struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, * return 0 if equal or greater * return 1 if smaller or the ip_block doesn't exist */ -int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, - enum amd_ip_block_type type, - u32 major, u32 minor) +int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, + enum amd_ip_block_type type, + u32 major, u32 minor) { - struct amdgpu_ip_block *ip_block = amdgpu_get_ip_block(adev, type); + struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); if (ip_block && ((ip_block->version->major > major) || ((ip_block->version->major == major) && @@ -1421,7 +1022,7 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, } /** - * amdgpu_ip_block_add + * amdgpu_device_ip_block_add * * @adev: amdgpu_device pointer * @ip_block_version: pointer to the IP to add @@ -1429,8 +1030,8 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, * Adds the IP block driver information to the collection of IPs * on the asic. */ -int amdgpu_ip_block_add(struct amdgpu_device *adev, - const struct amdgpu_ip_block_version *ip_block_version) +int amdgpu_device_ip_block_add(struct amdgpu_device *adev, + const struct amdgpu_ip_block_version *ip_block_version) { if (!ip_block_version) return -EINVAL; @@ -1586,7 +1187,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) return err; } -static int amdgpu_early_init(struct amdgpu_device *adev) +static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { int i, r; @@ -1695,7 +1296,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return 0; } -static int amdgpu_init(struct amdgpu_device *adev) +static int amdgpu_device_ip_init(struct amdgpu_device *adev) { int i, r; @@ -1711,7 +1312,7 @@ static int amdgpu_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.sw = true; /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - r = amdgpu_vram_scratch_init(adev); + r = amdgpu_device_vram_scratch_init(adev); if (r) { DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); return r; @@ -1721,9 +1322,9 @@ static int amdgpu_init(struct amdgpu_device *adev) DRM_ERROR("hw_init %d failed %d\n", i, r); return r; } - r = amdgpu_wb_init(adev); + r = amdgpu_device_wb_init(adev); if (r) { - DRM_ERROR("amdgpu_wb_init failed %d\n", r); + DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); return r; } adev->ip_blocks[i].status.hw = true; @@ -1762,18 +1363,18 @@ static int amdgpu_init(struct amdgpu_device *adev) return 0; } -static void amdgpu_fill_reset_magic(struct amdgpu_device *adev) +static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); } -static bool amdgpu_check_vram_lost(struct amdgpu_device *adev) +static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) { return !!memcmp(adev->gart.ptr, adev->reset_magic, AMDGPU_RESET_MAGIC_NUM); } -static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) +static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; @@ -1796,7 +1397,7 @@ static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) return 0; } -static int amdgpu_late_init(struct amdgpu_device *adev) +static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { int i = 0, r; @@ -1817,12 +1418,12 @@ static int amdgpu_late_init(struct amdgpu_device *adev) mod_delayed_work(system_wq, &adev->late_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); - amdgpu_fill_reset_magic(adev); + amdgpu_device_fill_reset_magic(adev); return 0; } -static int amdgpu_fini(struct amdgpu_device *adev) +static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; @@ -1856,8 +1457,8 @@ static int amdgpu_fini(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_free_static_csa(adev); - amdgpu_wb_fini(adev); - amdgpu_vram_scratch_fini(adev); + amdgpu_device_wb_fini(adev); + amdgpu_device_vram_scratch_fini(adev); } if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && @@ -1910,14 +1511,14 @@ static int amdgpu_fini(struct amdgpu_device *adev) return 0; } -static void amdgpu_late_init_func_handler(struct work_struct *work) +static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, late_init_work.work); - amdgpu_late_set_cg_state(adev); + amdgpu_device_ip_late_set_cg_state(adev); } -int amdgpu_suspend(struct amdgpu_device *adev) +int amdgpu_device_ip_suspend(struct amdgpu_device *adev) { int i, r; @@ -1925,10 +1526,10 @@ int amdgpu_suspend(struct amdgpu_device *adev) amdgpu_virt_request_full_gpu(adev, false); /* ungate SMC block first */ - r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, - AMD_CG_STATE_UNGATE); + r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, + AMD_CG_STATE_UNGATE); if (r) { - DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n",r); + DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r); } for (i = adev->num_ip_blocks - 1; i >= 0; i--) { @@ -1958,7 +1559,7 @@ int amdgpu_suspend(struct amdgpu_device *adev) return 0; } -static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) +static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) { int i, r; @@ -1987,7 +1588,7 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) return 0; } -static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) +static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) { int i, r; @@ -2020,7 +1621,7 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) return 0; } -static int amdgpu_resume_phase1(struct amdgpu_device *adev) +static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) { int i, r; @@ -2043,7 +1644,7 @@ static int amdgpu_resume_phase1(struct amdgpu_device *adev) return 0; } -static int amdgpu_resume_phase2(struct amdgpu_device *adev) +static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) { int i, r; @@ -2065,14 +1666,14 @@ static int amdgpu_resume_phase2(struct amdgpu_device *adev) return 0; } -static int amdgpu_resume(struct amdgpu_device *adev) +static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; - r = amdgpu_resume_phase1(adev); + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; - r = amdgpu_resume_phase2(adev); + r = amdgpu_device_ip_resume_phase2(adev); return r; } @@ -2211,7 +1812,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, hash_init(adev->mn_hash); mutex_init(&adev->lock_reset); - amdgpu_check_arguments(adev); + amdgpu_device_check_arguments(adev); spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); @@ -2229,7 +1830,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->ring_lru_list); spin_lock_init(&adev->ring_lru_list_lock); - INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); + INIT_DELAYED_WORK(&adev->late_init_work, + amdgpu_device_ip_late_init_func_handler); /* Registers mapping */ /* TODO: block userspace mapping of io register */ @@ -2249,7 +1851,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); /* doorbell bar mapping */ - amdgpu_doorbell_init(adev); + amdgpu_device_doorbell_init(adev); /* io port mapping */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { @@ -2263,17 +1865,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("PCI I/O BAR is not found.\n"); /* early init functions */ - r = amdgpu_early_init(adev); + r = amdgpu_device_ip_early_init(adev); if (r) return r; /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(adev->pdev, adev, NULL, amdgpu_vga_set_decode); + vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); - if (amdgpu_runtime_pm == 1) - runtime = true; if (amdgpu_device_is_px(ddev)) runtime = true; if (!pci_is_thunderbolt_attached(adev->pdev)) @@ -2299,7 +1899,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_detect_sriov_bios(adev); /* Post card if necessary */ - if (amdgpu_need_post(adev)) { + if (amdgpu_device_need_post(adev)) { if (!adev->bios) { dev_err(adev->dev, "no vBIOS found\n"); r = -EINVAL; @@ -2345,7 +1945,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* init the mode config */ drm_mode_config_init(adev->ddev); - r = amdgpu_init(adev); + r = amdgpu_device_ip_init(adev); if (r) { /* failed in exclusive mode due to timeout */ if (amdgpu_sriov_vf(adev) && @@ -2359,9 +1959,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = -EAGAIN; goto failed; } - dev_err(adev->dev, "amdgpu_init failed\n"); + dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); - amdgpu_fini(adev); + amdgpu_device_ip_fini(adev); goto failed; } @@ -2397,7 +1997,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); - r = amdgpu_gem_debugfs_init(adev); + r = amdgpu_debugfs_gem_init(adev); if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2405,17 +2005,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering register debugfs failed (%d).\n", r); - r = amdgpu_debugfs_test_ib_ring_init(adev); - if (r) - DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r); - r = amdgpu_debugfs_firmware_init(adev); if (r) DRM_ERROR("registering firmware debugfs failed (%d).\n", r); - r = amdgpu_debugfs_vbios_dump_init(adev); + r = amdgpu_debugfs_init(adev); if (r) - DRM_ERROR("Creating vbios dump debugfs failed (%d).\n", r); + DRM_ERROR("Creating debugfs files failed (%d).\n", r); if ((amdgpu_testing & 1)) { if (adev->accel_working) @@ -2433,9 +2029,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ - r = amdgpu_late_init(adev); + r = amdgpu_device_ip_late_init(adev); if (r) { - dev_err(adev->dev, "amdgpu_late_init failed\n"); + dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); goto failed; } @@ -2466,12 +2062,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->shutdown = true; if (adev->mode_info.mode_config_initialized) drm_crtc_force_disable_all(adev->ddev); - /* evict vram memory */ - amdgpu_bo_evict_vram(adev); + amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); - r = amdgpu_fini(adev); + r = amdgpu_device_ip_fini(adev); if (adev->firmware.gpu_info_fw) { release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; @@ -2494,7 +2089,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rio_mem = NULL; iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); + amdgpu_device_doorbell_fini(adev); amdgpu_pm_sysfs_fini(adev); amdgpu_debugfs_regs_cleanup(adev); } @@ -2575,7 +2170,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) amdgpu_fence_driver_suspend(adev); - r = amdgpu_suspend(adev); + r = amdgpu_device_ip_suspend(adev); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table @@ -2583,7 +2178,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - amdgpu_atombios_scratch_regs_save(adev); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ @@ -2632,18 +2226,17 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (r) goto unlock; } - amdgpu_atombios_scratch_regs_restore(adev); /* post card */ - if (amdgpu_need_post(adev)) { + if (amdgpu_device_need_post(adev)) { r = amdgpu_atom_asic_init(adev->mode_info.atom_context); if (r) DRM_ERROR("amdgpu asic init failed\n"); } - r = amdgpu_resume(adev); + r = amdgpu_device_ip_resume(adev); if (r) { - DRM_ERROR("amdgpu_resume failed (%d).\n", r); + DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); goto unlock; } amdgpu_fence_driver_resume(adev); @@ -2654,7 +2247,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) DRM_ERROR("ib ring test failed (%d).\n", r); } - r = amdgpu_late_init(adev); + r = amdgpu_device_ip_late_init(adev); if (r) goto unlock; @@ -2734,7 +2327,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) return r; } -static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) +static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) { int i; bool asic_hang = false; @@ -2756,7 +2349,7 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) return asic_hang; } -static int amdgpu_pre_soft_reset(struct amdgpu_device *adev) +static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) { int i, r = 0; @@ -2774,7 +2367,7 @@ static int amdgpu_pre_soft_reset(struct amdgpu_device *adev) return 0; } -static bool amdgpu_need_full_reset(struct amdgpu_device *adev) +static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) { int i; @@ -2795,7 +2388,7 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) return false; } -static int amdgpu_soft_reset(struct amdgpu_device *adev) +static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) { int i, r = 0; @@ -2813,7 +2406,7 @@ static int amdgpu_soft_reset(struct amdgpu_device *adev) return 0; } -static int amdgpu_post_soft_reset(struct amdgpu_device *adev) +static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) { int i, r = 0; @@ -2830,18 +2423,10 @@ static int amdgpu_post_soft_reset(struct amdgpu_device *adev) return 0; } -bool amdgpu_need_backup(struct amdgpu_device *adev) -{ - if (adev->flags & AMD_IS_APU) - return false; - - return amdgpu_lockup_timeout > 0 ? true : false; -} - -static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct dma_fence **fence) +static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_bo *bo, + struct dma_fence **fence) { uint32_t domain; int r; @@ -2874,7 +2459,7 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, } /* - * amdgpu_reset - reset ASIC/GPU for bare-metal or passthrough + * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough * * @adev: amdgpu device pointer * @reset_flags: output param tells caller the reset result @@ -2882,18 +2467,19 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, * attempt to do soft-reset or full-reset and reinitialize Asic * return 0 means successed otherwise failed */ -static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags) +static int amdgpu_device_reset(struct amdgpu_device *adev, + uint64_t* reset_flags) { bool need_full_reset, vram_lost = 0; int r; - need_full_reset = amdgpu_need_full_reset(adev); + need_full_reset = amdgpu_device_ip_need_full_reset(adev); if (!need_full_reset) { - amdgpu_pre_soft_reset(adev); - r = amdgpu_soft_reset(adev); - amdgpu_post_soft_reset(adev); - if (r || amdgpu_check_soft_reset(adev)) { + amdgpu_device_ip_pre_soft_reset(adev); + r = amdgpu_device_ip_soft_reset(adev); + amdgpu_device_ip_post_soft_reset(adev); + if (r || amdgpu_device_ip_check_soft_reset(adev)) { DRM_INFO("soft reset failed, will fallback to full reset!\n"); need_full_reset = true; } @@ -2901,22 +2487,20 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags) } if (need_full_reset) { - r = amdgpu_suspend(adev); + r = amdgpu_device_ip_suspend(adev); retry: - amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_asic_reset(adev); - amdgpu_atombios_scratch_regs_restore(adev); /* post card */ amdgpu_atom_asic_init(adev->mode_info.atom_context); if (!r) { dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume_phase1(adev); + r = amdgpu_device_ip_resume_phase1(adev); if (r) goto out; - vram_lost = amdgpu_check_vram_lost(adev); + vram_lost = amdgpu_device_check_vram_lost(adev); if (vram_lost) { DRM_ERROR("VRAM is lost!\n"); atomic_inc(&adev->vram_lost_counter); @@ -2927,12 +2511,12 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags) if (r) goto out; - r = amdgpu_resume_phase2(adev); + r = amdgpu_device_ip_resume_phase2(adev); if (r) goto out; if (vram_lost) - amdgpu_fill_reset_magic(adev); + amdgpu_device_fill_reset_magic(adev); } } @@ -2942,7 +2526,7 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags) r = amdgpu_ib_ring_tests(adev); if (r) { dev_err(adev->dev, "ib ring test failed (%d).\n", r); - r = amdgpu_suspend(adev); + r = amdgpu_device_ip_suspend(adev); need_full_reset = true; goto retry; } @@ -2960,7 +2544,7 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags) } /* - * amdgpu_reset_sriov - reset ASIC for SR-IOV vf + * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu device pointer * @reset_flags: output param tells caller the reset result @@ -2968,7 +2552,9 @@ static int amdgpu_reset(struct amdgpu_device *adev, uint64_t* reset_flags) * do VF FLR and reinitialize Asic * return 0 means successed otherwise failed */ -static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, bool from_hypervisor) +static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, + uint64_t *reset_flags, + bool from_hypervisor) { int r; @@ -2980,7 +2566,7 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, return r; /* Resume IP prior to SMC */ - r = amdgpu_sriov_reinit_early(adev); + r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) goto error; @@ -2988,7 +2574,7 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); /* now we are okay to resume SMC/CP/SDMA */ - r = amdgpu_sriov_reinit_late(adev); + r = amdgpu_device_ip_reinit_late_sriov(adev); if (r) goto error; @@ -3015,25 +2601,33 @@ static int amdgpu_reset_sriov(struct amdgpu_device *adev, uint64_t *reset_flags, } /** - * amdgpu_gpu_recover - reset the asic and recover scheduler + * amdgpu_device_gpu_recover - reset the asic and recover scheduler * * @adev: amdgpu device pointer * @job: which job trigger hang + * @force forces reset regardless of amdgpu_gpu_recovery * * Attempt to reset the GPU if it has hung (all asics). * Returns 0 for success or an error on failure. */ -int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, bool force) { struct drm_atomic_state *state = NULL; uint64_t reset_flags = 0; int i, r, resched; - if (!amdgpu_check_soft_reset(adev)) { + if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); return 0; } + if (!force && (amdgpu_gpu_recovery == 0 || + (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { + DRM_INFO("GPU recovery disabled.\n"); + return 0; + } + dev_info(adev->dev, "GPU reset begin!\n"); mutex_lock(&adev->lock_reset); @@ -3058,16 +2652,16 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) continue; kthread_park(ring->sched.thread); - amd_sched_hw_job_reset(&ring->sched, &job->base); + drm_sched_hw_job_reset(&ring->sched, &job->base); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } if (amdgpu_sriov_vf(adev)) - r = amdgpu_reset_sriov(adev, &reset_flags, job ? false : true); + r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true); else - r = amdgpu_reset(adev, &reset_flags); + r = amdgpu_device_reset(adev, &reset_flags); if (!r) { if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) || @@ -3080,7 +2674,7 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { next = NULL; - amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); + amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { r = dma_fence_wait(fence, false); if (r) { @@ -3111,7 +2705,7 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) if (job && job->ring->idx != i) continue; - amd_sched_job_recovery(&ring->sched); + drm_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } } else { @@ -3153,7 +2747,7 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) return r; } -void amdgpu_get_pcie_info(struct amdgpu_device *adev) +void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { u32 mask; int ret; @@ -3245,773 +2839,3 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev) } } -/* - * Debugfs - */ -int amdgpu_debugfs_add_files(struct amdgpu_device *adev, - const struct drm_info_list *files, - unsigned nfiles) -{ - unsigned i; - - for (i = 0; i < adev->debugfs_count; i++) { - if (adev->debugfs[i].files == files) { - /* Already registered */ - return 0; - } - } - - i = adev->debugfs_count + 1; - if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) { - DRM_ERROR("Reached maximum number of debugfs components.\n"); - DRM_ERROR("Report so we increase " - "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n"); - return -EINVAL; - } - adev->debugfs[adev->debugfs_count].files = files; - adev->debugfs[adev->debugfs_count].num_files = nfiles; - adev->debugfs_count = i; -#if defined(CONFIG_DEBUG_FS) - drm_debugfs_create_files(files, nfiles, - adev->ddev->primary->debugfs_root, - adev->ddev->primary); -#endif - return 0; -} - -#if defined(CONFIG_DEBUG_FS) - -static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - bool pm_pg_lock, use_bank; - unsigned instance_bank, sh_bank, se_bank; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - /* are we reading registers for which a PG lock is necessary? */ - pm_pg_lock = (*pos >> 23) & 1; - - if (*pos & (1ULL << 62)) { - se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; - sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; - instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; - - if (se_bank == 0x3FF) - se_bank = 0xFFFFFFFF; - if (sh_bank == 0x3FF) - sh_bank = 0xFFFFFFFF; - if (instance_bank == 0x3FF) - instance_bank = 0xFFFFFFFF; - use_bank = 1; - } else { - use_bank = 0; - } - - *pos &= (1UL << 22) - 1; - - if (use_bank) { - if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) - return -EINVAL; - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); - } - - if (pm_pg_lock) - mutex_lock(&adev->pm.mutex); - - while (size) { - uint32_t value; - - if (*pos > adev->rmmio_size) - goto end; - - value = RREG32(*pos >> 2); - r = put_user(value, (uint32_t *)buf); - if (r) { - result = r; - goto end; - } - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - -end: - if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - } - - if (pm_pg_lock) - mutex_unlock(&adev->pm.mutex); - - return result; -} - -static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - bool pm_pg_lock, use_bank; - unsigned instance_bank, sh_bank, se_bank; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - /* are we reading registers for which a PG lock is necessary? */ - pm_pg_lock = (*pos >> 23) & 1; - - if (*pos & (1ULL << 62)) { - se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; - sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; - instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; - - if (se_bank == 0x3FF) - se_bank = 0xFFFFFFFF; - if (sh_bank == 0x3FF) - sh_bank = 0xFFFFFFFF; - if (instance_bank == 0x3FF) - instance_bank = 0xFFFFFFFF; - use_bank = 1; - } else { - use_bank = 0; - } - - *pos &= (1UL << 22) - 1; - - if (use_bank) { - if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) - return -EINVAL; - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); - } - - if (pm_pg_lock) - mutex_lock(&adev->pm.mutex); - - while (size) { - uint32_t value; - - if (*pos > adev->rmmio_size) - return result; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32(*pos >> 2, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - } - - if (pm_pg_lock) - mutex_unlock(&adev->pm.mutex); - - return result; -} - -static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - value = RREG32_PCIE(*pos >> 2); - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32_PCIE(*pos >> 2, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - value = RREG32_DIDT(*pos >> 2); - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32_DIDT(*pos >> 2, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - value = RREG32_SMC(*pos); - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32_SMC(*pos, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - uint32_t *config, no_regs = 0; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - config = kmalloc_array(256, sizeof(*config), GFP_KERNEL); - if (!config) - return -ENOMEM; - - /* version, increment each time something is added */ - config[no_regs++] = 3; - config[no_regs++] = adev->gfx.config.max_shader_engines; - config[no_regs++] = adev->gfx.config.max_tile_pipes; - config[no_regs++] = adev->gfx.config.max_cu_per_sh; - config[no_regs++] = adev->gfx.config.max_sh_per_se; - config[no_regs++] = adev->gfx.config.max_backends_per_se; - config[no_regs++] = adev->gfx.config.max_texture_channel_caches; - config[no_regs++] = adev->gfx.config.max_gprs; - config[no_regs++] = adev->gfx.config.max_gs_threads; - config[no_regs++] = adev->gfx.config.max_hw_contexts; - config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend; - config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend; - config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size; - config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size; - config[no_regs++] = adev->gfx.config.num_tile_pipes; - config[no_regs++] = adev->gfx.config.backend_enable_mask; - config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes; - config[no_regs++] = adev->gfx.config.mem_row_size_in_kb; - config[no_regs++] = adev->gfx.config.shader_engine_tile_size; - config[no_regs++] = adev->gfx.config.num_gpus; - config[no_regs++] = adev->gfx.config.multi_gpu_tile_size; - config[no_regs++] = adev->gfx.config.mc_arb_ramcfg; - config[no_regs++] = adev->gfx.config.gb_addr_config; - config[no_regs++] = adev->gfx.config.num_rbs; - - /* rev==1 */ - config[no_regs++] = adev->rev_id; - config[no_regs++] = adev->pg_flags; - config[no_regs++] = adev->cg_flags; - - /* rev==2 */ - config[no_regs++] = adev->family; - config[no_regs++] = adev->external_rev_id; - - /* rev==3 */ - config[no_regs++] = adev->pdev->device; - config[no_regs++] = adev->pdev->revision; - config[no_regs++] = adev->pdev->subsystem_device; - config[no_regs++] = adev->pdev->subsystem_vendor; - - while (size && (*pos < no_regs * 4)) { - uint32_t value; - - value = config[*pos >> 2]; - r = put_user(value, (uint32_t *)buf); - if (r) { - kfree(config); - return r; - } - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - kfree(config); - return result; -} - -static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - int idx, x, outsize, r, valuesize; - uint32_t values[16]; - - if (size & 3 || *pos & 0x3) - return -EINVAL; - - if (amdgpu_dpm == 0) - return -EINVAL; - - /* convert offset to sensor number */ - idx = *pos >> 2; - - valuesize = sizeof(values); - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) - r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); - else - return -EINVAL; - - if (size > valuesize) - return -EINVAL; - - outsize = 0; - x = 0; - if (!r) { - while (size) { - r = put_user(values[x++], (int32_t *)buf); - buf += 4; - size -= 4; - outsize += 4; - } - } - - return !r ? outsize : r; -} - -static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = f->f_inode->i_private; - int r, x; - ssize_t result=0; - uint32_t offset, se, sh, cu, wave, simd, data[32]; - - if (size & 3 || *pos & 3) - return -EINVAL; - - /* decode offset */ - offset = (*pos & GENMASK_ULL(6, 0)); - se = (*pos & GENMASK_ULL(14, 7)) >> 7; - sh = (*pos & GENMASK_ULL(22, 15)) >> 15; - cu = (*pos & GENMASK_ULL(30, 23)) >> 23; - wave = (*pos & GENMASK_ULL(36, 31)) >> 31; - simd = (*pos & GENMASK_ULL(44, 37)) >> 37; - - /* switch to the specific se/sh/cu */ - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); - - x = 0; - if (adev->gfx.funcs->read_wave_data) - adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); - - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - mutex_unlock(&adev->grbm_idx_mutex); - - if (!x) - return -EINVAL; - - while (size && (offset < x * 4)) { - uint32_t value; - - value = data[offset >> 2]; - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - offset += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = f->f_inode->i_private; - int r; - ssize_t result = 0; - uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - - if (size & 3 || *pos & 3) - return -EINVAL; - - /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); - se = (*pos & GENMASK_ULL(19, 12)) >> 12; - sh = (*pos & GENMASK_ULL(27, 20)) >> 20; - cu = (*pos & GENMASK_ULL(35, 28)) >> 28; - wave = (*pos & GENMASK_ULL(43, 36)) >> 36; - simd = (*pos & GENMASK_ULL(51, 44)) >> 44; - thread = (*pos & GENMASK_ULL(59, 52)) >> 52; - bank = (*pos & GENMASK_ULL(61, 60)) >> 60; - - data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - /* switch to the specific se/sh/cu */ - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); - - if (bank == 0) { - if (adev->gfx.funcs->read_wave_vgprs) - adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); - } else { - if (adev->gfx.funcs->read_wave_sgprs) - adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); - } - - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - mutex_unlock(&adev->grbm_idx_mutex); - - while (size) { - uint32_t value; - - value = data[offset++]; - r = put_user(value, (uint32_t *)buf); - if (r) { - result = r; - goto err; - } - - result += 4; - buf += 4; - size -= 4; - } - -err: - kfree(data); - return result; -} - -static const struct file_operations amdgpu_debugfs_regs_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_read, - .write = amdgpu_debugfs_regs_write, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_regs_didt_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_didt_read, - .write = amdgpu_debugfs_regs_didt_write, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_pcie_read, - .write = amdgpu_debugfs_regs_pcie_write, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_regs_smc_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_smc_read, - .write = amdgpu_debugfs_regs_smc_write, - .llseek = default_llseek -}; - -static const struct file_operations amdgpu_debugfs_gca_config_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_gca_config_read, - .llseek = default_llseek -}; - -static const struct file_operations amdgpu_debugfs_sensors_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_sensor_read, - .llseek = default_llseek -}; - -static const struct file_operations amdgpu_debugfs_wave_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_wave_read, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_gpr_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_gpr_read, - .llseek = default_llseek -}; - -static const struct file_operations *debugfs_regs[] = { - &amdgpu_debugfs_regs_fops, - &amdgpu_debugfs_regs_didt_fops, - &amdgpu_debugfs_regs_pcie_fops, - &amdgpu_debugfs_regs_smc_fops, - &amdgpu_debugfs_gca_config_fops, - &amdgpu_debugfs_sensors_fops, - &amdgpu_debugfs_wave_fops, - &amdgpu_debugfs_gpr_fops, -}; - -static const char *debugfs_regs_names[] = { - "amdgpu_regs", - "amdgpu_regs_didt", - "amdgpu_regs_pcie", - "amdgpu_regs_smc", - "amdgpu_gca_config", - "amdgpu_sensors", - "amdgpu_wave", - "amdgpu_gpr", -}; - -static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) -{ - struct drm_minor *minor = adev->ddev->primary; - struct dentry *ent, *root = minor->debugfs_root; - unsigned i, j; - - for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { - ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | S_IRUGO, root, - adev, debugfs_regs[i]); - if (IS_ERR(ent)) { - for (j = 0; j < i; j++) { - debugfs_remove(adev->debugfs_regs[i]); - adev->debugfs_regs[i] = NULL; - } - return PTR_ERR(ent); - } - - if (!i) - i_size_write(ent->d_inode, adev->rmmio_size); - adev->debugfs_regs[i] = ent; - } - - return 0; -} - -static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) -{ - unsigned i; - - for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { - if (adev->debugfs_regs[i]) { - debugfs_remove(adev->debugfs_regs[i]); - adev->debugfs_regs[i] = NULL; - } - } -} - -static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) -{ - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct amdgpu_device *adev = dev->dev_private; - int r = 0, i; - - /* hold on the scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - kthread_park(ring->sched.thread); - } - - seq_printf(m, "run ib test:\n"); - r = amdgpu_ib_ring_tests(adev); - if (r) - seq_printf(m, "ib ring tests failed (%d).\n", r); - else - seq_printf(m, "ib ring tests passed.\n"); - - /* go on the scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - kthread_unpark(ring->sched.thread); - } - - return 0; -} - -static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = { - {"amdgpu_test_ib", &amdgpu_debugfs_test_ib} -}; - -static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) -{ - return amdgpu_debugfs_add_files(adev, - amdgpu_debugfs_test_ib_ring_list, 1); -} - -int amdgpu_debugfs_init(struct drm_minor *minor) -{ - return 0; -} - -static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data) -{ - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct amdgpu_device *adev = dev->dev_private; - - seq_write(m, adev->bios, adev->bios_size); - return 0; -} - -static const struct drm_info_list amdgpu_vbios_dump_list[] = { - {"amdgpu_vbios", - amdgpu_debugfs_get_vbios_dump, - 0, NULL}, -}; - -static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev) -{ - return amdgpu_debugfs_add_files(adev, - amdgpu_vbios_dump_list, 1); -} -#else -static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) -{ - return 0; -} -static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) -{ - return 0; -} -static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev) -{ - return 0; -} -static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 31383e0049476153cd87d2dcfe2cf90805941751..50afcf65181a4e7f4ac5e5dc3c804a1af06864e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -90,7 +90,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 0; +int amdgpu_lockup_timeout = 10000; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; +int amdgpu_gpu_recovery = -1; /* auto */ MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -165,7 +166,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 0 = disable)"); +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); @@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); +module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -645,7 +649,7 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) * unfortunately we can't detect certain * hypervisors so just do this all the time. */ - amdgpu_suspend(adev); + amdgpu_device_ip_suspend(adev); } static int amdgpu_pmops_suspend(struct device *dev) @@ -850,9 +854,6 @@ static struct drm_driver kms_driver = { .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, .get_scanout_position = amdgpu_get_crtc_scanout_position, -#if defined(CONFIG_DEBUG_FS) - .debugfs_init = amdgpu_debugfs_init, -#endif .irq_preinstall = amdgpu_irq_preinstall, .irq_postinstall = amdgpu_irq_postinstall, .irq_uninstall = amdgpu_irq_uninstall, @@ -912,10 +913,6 @@ static int __init amdgpu_init(void) if (r) goto error_fence; - r = amd_sched_fence_slab_init(); - if (r) - goto error_sched; - if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); return -EINVAL; @@ -928,9 +925,6 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(pdriver); -error_sched: - amdgpu_fence_slab_fini(); - error_fence: amdgpu_sync_fini(); @@ -944,7 +938,6 @@ static void __exit amdgpu_exit(void) pci_unregister_driver(pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); - amd_sched_fence_slab_fini(); amdgpu_fence_slab_fini(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 604ac03a42e4f3858d87ab2d53ca563bcb4e7b93..008e1984b7e39b9d51a9021c0df5762d889d6506 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -187,7 +187,7 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) seq = ++ring->fence_drv.sync_seq; amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - seq, AMDGPU_FENCE_FLAG_INT); + seq, 0); *s = seq; @@ -410,7 +410,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission) { - long timeout; int r; /* Check that num_hw_submission is a power of two */ @@ -434,20 +433,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, /* No need to setup the GPU scheduler for KIQ ring */ if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { - timeout = msecs_to_jiffies(amdgpu_lockup_timeout); - if (timeout == 0) { - /* - * FIXME: - * Delayed workqueue cannot use it directly, - * so the scheduler will not use delayed workqueue if - * MAX_SCHEDULE_TIMEOUT is set. - * Currently keep it simple and silly. - */ - timeout = MAX_SCHEDULE_TIMEOUT; - } - r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, num_hw_submission, amdgpu_job_hang_limit, - timeout, ring->name); + msecs_to_jiffies(amdgpu_lockup_timeout), ring->name); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); @@ -503,7 +491,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) } amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); - amd_sched_fini(&ring->sched); + drm_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) dma_fence_put(ring->fence_drv.fences[j]); @@ -705,7 +693,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; seq_printf(m, "gpu recover\n"); - amdgpu_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL, true); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 1f51897acc5b4d7bcf0b86fa7e2c53accff60d49..0a4f34afaaaa321dc2a0680a227660d8ecefd0d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -56,6 +56,51 @@ * Common GART table functions. */ +/** + * amdgpu_dummy_page_init - init dummy page used by the driver + * + * @adev: amdgpu_device pointer + * + * Allocate the dummy page used by the driver (all asics). + * This dummy page is used by the driver as a filler for gart entries + * when pages are taken out of the GART + * Returns 0 on sucess, -ENOMEM on failure. + */ +static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) +{ + if (adev->dummy_page.page) + return 0; + adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); + if (adev->dummy_page.page == NULL) + return -ENOMEM; + adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page, + 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) { + dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); + __free_page(adev->dummy_page.page); + adev->dummy_page.page = NULL; + return -ENOMEM; + } + return 0; +} + +/** + * amdgpu_dummy_page_fini - free dummy page used by the driver + * + * @adev: amdgpu_device pointer + * + * Frees the dummy page used by the driver (all asics). + */ +static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) +{ + if (adev->dummy_page.page == NULL) + return; + pci_unmap_page(adev->pdev, adev->dummy_page.addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + __free_page(adev->dummy_page.page); + adev->dummy_page.page = NULL; +} + /** * amdgpu_gart_table_vram_alloc - allocate vram for gart page table * @@ -308,7 +353,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev) DRM_ERROR("Page size is smaller than GPU page size!\n"); return -EINVAL; } - r = amdgpu_dummy_page_init(adev); + r = amdgpu_gart_dummy_page_init(adev); if (r) return r; /* Compute table size */ @@ -340,5 +385,5 @@ void amdgpu_gart_fini(struct amdgpu_device *adev) vfree(adev->gart.pages); adev->gart.pages = NULL; #endif - amdgpu_dummy_page_fini(adev); + amdgpu_gart_dummy_page_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index eb75eb44efc681d8dc10789db64de687e963546a..e48b4ec88c8c72b84599d8f85b38836c92c26523 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -518,10 +518,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (!amdgpu_vm_ready(vm)) return; - r = amdgpu_vm_update_directories(adev, vm); - if (r) - goto error; - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) goto error; @@ -530,6 +526,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, operation == AMDGPU_VA_OP_REPLACE) r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_update_directories(adev, vm); + if (r) + goto error; + error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); @@ -851,7 +851,7 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = { }; #endif -int amdgpu_gem_debugfs_init(struct amdgpu_device *adev) +int amdgpu_debugfs_gem_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ef043361009f4c2a8de0788beeed86aef288cd7a..bb40d2529a307eb9f71973e1d0628e3766da984c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -203,7 +203,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); if (r) return r; @@ -229,7 +229,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 0cf86eb357d61aa73f008d07076f0fec926c6883..a162d87ca0c8e7bedf2d936f4cd63abf98fe534e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -149,7 +149,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } - if (vm && !job->vm_id) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } @@ -211,7 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; - amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, + amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, need_ctx_switch); need_ctx_switch = false; } @@ -229,9 +229,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_fence_emit(ring, f); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); - if (job && job->vm_id) - amdgpu_vm_reset_id(adev, ring->funcs->vmhub, - job->vm_id); + if (job && job->vmid) + amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c new file mode 100644 index 0000000000000000000000000000000000000000..16884a0b677b93892ff35672364836617fa12985 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -0,0 +1,459 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ids.h" + +#include +#include +#include + +#include "amdgpu.h" +#include "amdgpu_trace.h" + +/* + * PASID manager + * + * PASIDs are global address space identifiers that can be shared + * between the GPU, an IOMMU and the driver. VMs on different devices + * may use the same PASID if they share the same address + * space. Therefore PASIDs are allocated using a global IDA. VMs are + * looked up from the PASID per amdgpu_device. + */ +static DEFINE_IDA(amdgpu_pasid_ida); + +/** + * amdgpu_pasid_alloc - Allocate a PASID + * @bits: Maximum width of the PASID in bits, must be at least 1 + * + * Allocates a PASID of the given width while keeping smaller PASIDs + * available if possible. + * + * Returns a positive integer on success. Returns %-EINVAL if bits==0. + * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on + * memory allocation failure. + */ +int amdgpu_pasid_alloc(unsigned int bits) +{ + int pasid = -EINVAL; + + for (bits = min(bits, 31U); bits > 0; bits--) { + pasid = ida_simple_get(&amdgpu_pasid_ida, + 1U << (bits - 1), 1U << bits, + GFP_KERNEL); + if (pasid != -ENOSPC) + break; + } + + return pasid; +} + +/** + * amdgpu_pasid_free - Free a PASID + * @pasid: PASID to free + */ +void amdgpu_pasid_free(unsigned int pasid) +{ + ida_simple_remove(&amdgpu_pasid_ida, pasid); +} + +/* + * VMID manager + * + * VMIDs are a per VMHUB identifier for page tables handling. + */ + +/** + * amdgpu_vmid_had_gpu_reset - check if reset occured since last use + * + * @adev: amdgpu_device pointer + * @id: VMID structure + * + * Check if GPU reset occured since last use of the VMID. + */ +bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, + struct amdgpu_vmid *id) +{ + return id->current_gpu_reset_count != + atomic_read(&adev->gpu_reset_counter); +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vmid *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct dma_fence *updates = sync->last_vm_update; + int r = 0; + struct dma_fence *flushed, *tmp; + bool needs_flush = vm->use_cpu_for_update; + + flushed = id->flushed_updates; + if ((amdgpu_vmid_had_gpu_reset(adev, id)) || + (atomic64_read(&id->owner) != vm->entity.fence_context) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush)))) { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&id->active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp, false); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + atomic64_set(&id->owner, vm->entity.fence_context); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vmid = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + +/** + * amdgpu_vm_grab_id - allocate the next free VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * + * Allocate an id for the vm, adding fences to the sync obj as necessary. + */ +int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + uint64_t fence_context = adev->fence_context + ring->idx; + struct dma_fence *updates = sync->last_vm_update; + struct amdgpu_vmid *id, *idle; + struct dma_fence **fences; + unsigned i; + int r = 0; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job); + mutex_unlock(&id_mgr->lock); + return r; + } + fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); + if (!fences) { + mutex_unlock(&id_mgr->lock); + return -ENOMEM; + } + /* Check if we have an idle VMID */ + i = 0; + list_for_each_entry(idle, &id_mgr->ids_lru, list) { + fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); + if (!fences[i]) + break; + ++i; + } + + /* If we can't find a idle VMID to use, wait till one becomes available */ + if (&idle->list == &id_mgr->ids_lru) { + u64 fence_context = adev->vm_manager.fence_context + ring->idx; + unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; + struct dma_fence_array *array; + unsigned j; + + for (j = 0; j < i; ++j) + dma_fence_get(fences[j]); + + array = dma_fence_array_create(i, fences, fence_context, + seqno, true); + if (!array) { + for (j = 0; j < i; ++j) + dma_fence_put(fences[j]); + kfree(fences); + r = -ENOMEM; + goto error; + } + + + r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); + dma_fence_put(&array->base); + if (r) + goto error; + + mutex_unlock(&id_mgr->lock); + return 0; + + } + kfree(fences); + + job->vm_needs_flush = vm->use_cpu_for_update; + /* Check if we can use a VMID already assigned to this VM */ + list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { + struct dma_fence *flushed; + bool needs_flush = vm->use_cpu_for_update; + + /* Check all the prerequisites to using this VMID */ + if (amdgpu_vmid_had_gpu_reset(adev, id)) + continue; + + if (atomic64_read(&id->owner) != vm->entity.fence_context) + continue; + + if (job->vm_pd_addr != id->pd_gpu_addr) + continue; + + if (!id->last_flush || + (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush))) + needs_flush = true; + + flushed = id->flushed_updates; + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + needs_flush = true; + + /* Concurrent flushes are only possible starting with Vega10 */ + if (adev->asic_type < CHIP_VEGA10 && needs_flush) + continue; + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + if (r) + goto error; + + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + + if (needs_flush) + goto needs_flush; + else + goto no_flush_needed; + + }; + + /* Still no ID to use? Then use the idle one found earlier */ + id = idle; + + /* Remember this submission as user of the VMID */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + if (r) + goto error; + + id->pd_gpu_addr = job->vm_pd_addr; + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + atomic64_set(&id->owner, vm->entity.fence_context); + +needs_flush: + job->vm_needs_flush = true; + dma_fence_put(id->last_flush); + id->last_flush = NULL; + +no_flush_needed: + list_move_tail(&id->list, &id_mgr->ids_lru); + + job->vmid = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); + +error: + mutex_unlock(&id_mgr->lock); + return r; +} + +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vmid_mgr *id_mgr; + struct amdgpu_vmid *idle; + int r = 0; + + id_mgr = &adev->vm_manager.id_mgr[vmhub]; + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (atomic_inc_return(&id_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(&id_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } + /* Select the first entry VMID */ + idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); + list_del_init(&idle->list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(&id_mgr->lock); + + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; +} + +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + atomic_dec(&id_mgr->reserved_vmid_num); + } + mutex_unlock(&id_mgr->lock); +} + +/** + * amdgpu_vmid_reset - reset VMID to zero + * + * @adev: amdgpu device structure + * @vmid: vmid number to use + * + * Reset saved GDW, GWS and OA to force switch on next flush. + */ +void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, + unsigned vmid) +{ + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id = &id_mgr->ids[vmid]; + + atomic64_set(&id->owner, 0); + id->gds_base = 0; + id->gds_size = 0; + id->gws_base = 0; + id->gws_size = 0; + id->oa_base = 0; + id->oa_size = 0; +} + +/** + * amdgpu_vmid_reset_all - reset VMID to zero + * + * @adev: amdgpu device structure + * + * Reset VMID to force flush on next use + */ +void amdgpu_vmid_reset_all(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[i]; + + for (j = 1; j < id_mgr->num_ids; ++j) + amdgpu_vmid_reset(adev, i, j); + } +} + +/** + * amdgpu_vmid_mgr_init - init the VMID manager + * + * @adev: amdgpu_device pointer + * + * Initialize the VM manager structures + */ +void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[i]; + + mutex_init(&id_mgr->lock); + INIT_LIST_HEAD(&id_mgr->ids_lru); + atomic_set(&id_mgr->reserved_vmid_num, 0); + + /* skip over VMID 0, since it is the system VM */ + for (j = 1; j < id_mgr->num_ids; ++j) { + amdgpu_vmid_reset(adev, i, j); + amdgpu_sync_create(&id_mgr->ids[i].active); + list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); + } + } + + adev->vm_manager.fence_context = + dma_fence_context_alloc(AMDGPU_MAX_RINGS); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + adev->vm_manager.seqno[i] = 0; +} + +/** + * amdgpu_vmid_mgr_fini - cleanup VM manager + * + * @adev: amdgpu_device pointer + * + * Cleanup the VM manager and free resources. + */ +void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[i]; + + mutex_destroy(&id_mgr->lock); + for (j = 0; j < AMDGPU_NUM_VMID; ++j) { + struct amdgpu_vmid *id = &id_mgr->ids[j]; + + amdgpu_sync_free(&id->active); + dma_fence_put(id->flushed_updates); + dma_fence_put(id->last_flush); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h new file mode 100644 index 0000000000000000000000000000000000000000..ad931fa570b37d5030bfdf3db83fe27f4735601a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -0,0 +1,91 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_IDS_H__ +#define __AMDGPU_IDS_H__ + +#include +#include +#include +#include + +#include "amdgpu_sync.h" + +/* maximum number of VMIDs */ +#define AMDGPU_NUM_VMID 16 + +struct amdgpu_device; +struct amdgpu_vm; +struct amdgpu_ring; +struct amdgpu_sync; +struct amdgpu_job; + +struct amdgpu_vmid { + struct list_head list; + struct amdgpu_sync active; + struct dma_fence *last_flush; + atomic64_t owner; + + uint64_t pd_gpu_addr; + /* last flushed PD/PT update */ + struct dma_fence *flushed_updates; + + uint32_t current_gpu_reset_count; + + uint32_t gds_base; + uint32_t gds_size; + uint32_t gws_base; + uint32_t gws_size; + uint32_t oa_base; + uint32_t oa_size; +}; + +struct amdgpu_vmid_mgr { + struct mutex lock; + unsigned num_ids; + struct list_head ids_lru; + struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; + atomic_t reserved_vmid_num; +}; + +int amdgpu_pasid_alloc(unsigned int bits); +void amdgpu_pasid_free(unsigned int pasid); + +bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, + struct amdgpu_vmid *id); +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub); +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub); +int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct dma_fence *fence, + struct amdgpu_job *job); +void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, + unsigned vmid); +void amdgpu_vmid_reset_all(struct amdgpu_device *adev); + +void amdgpu_vmid_mgr_init(struct amdgpu_device *adev); +void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f5f27e4f0f7ff1ae788ebb9fa77ed3d3b11ee7ac..06373d44b3da3062d16c3f3a11fc4a7a39508774 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -92,15 +92,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, } return 0; } else { - r = amdgpu_wb_get(adev, &adev->irq.ih.wptr_offs); + r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs); if (r) { dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &adev->irq.ih.rptr_offs); + r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs); if (r) { - amdgpu_wb_free(adev, adev->irq.ih.wptr_offs); + amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r); return r; } @@ -133,8 +133,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj, &adev->irq.ih.gpu_addr, (void **)&adev->irq.ih.ring); - amdgpu_wb_free(adev, adev->irq.ih.wptr_offs); - amdgpu_wb_free(adev, adev->irq.ih.rptr_offs); + amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); + amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index ada89358e2207b566004495053b00e3ba68530b8..29cf10927a92ce6655064c9b028644473e69182b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -105,8 +105,8 @@ struct amdgpu_iv_entry { unsigned client_id; unsigned src_id; unsigned ring_id; - unsigned vm_id; - unsigned vm_id_src; + unsigned vmid; + unsigned vmid_src; uint64_t timestamp; unsigned timestamp_src; unsigned pas_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index c340774082ea225e7dc2106397c1630d1aeb180c..56bcd59c3399a0a912ea1fa8f869c105b8827327 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) reset_work); if (!amdgpu_sriov_vf(adev)) - amdgpu_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL, false); } /* Disable *all* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index bdc210ac74f800edb99629c872c9fdc1af658b14..2bd56760c7441fdc720d3a469fef424ae39741ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -28,7 +28,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" -static void amdgpu_job_timedout(struct amd_sched_job *s_job) +static void amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); @@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) atomic_read(&job->ring->fence_drv.last_seq), job->ring->fence_drv.sync_seq); - amdgpu_gpu_recover(job->adev, job); + amdgpu_device_gpu_recover(job->adev, job, false); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -96,7 +96,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) amdgpu_ib_free(job->adev, &job->ibs[i], f); } -static void amdgpu_job_free_cb(struct amd_sched_job *s_job) +static void amdgpu_job_free_cb(struct drm_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); @@ -118,7 +118,7 @@ void amdgpu_job_free(struct amdgpu_job *job) } int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, - struct amd_sched_entity *entity, void *owner, + struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { int r; @@ -127,7 +127,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, if (!f) return -EINVAL; - r = amd_sched_job_init(&job->base, &ring->sched, entity, owner); + r = drm_sched_job_init(&job->base, &ring->sched, entity, owner); if (r) return r; @@ -136,13 +136,13 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); amdgpu_ring_priority_get(job->ring, job->base.s_priority); - amd_sched_entity_push_job(&job->base, entity); + drm_sched_entity_push_job(&job->base, entity); return 0; } -static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, - struct amd_sched_entity *s_entity) +static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) { struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; @@ -151,19 +151,19 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit); if (fence && explicit) { - if (amd_sched_dependency_optimized(fence, s_entity)) { + if (drm_sched_dependency_optimized(fence, s_entity)) { r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false); if (r) DRM_ERROR("Error adding fence to sync (%d)\n", r); } } - while (fence == NULL && vm && !job->vm_id) { + while (fence == NULL && vm && !job->vmid) { struct amdgpu_ring *ring = job->ring; - r = amdgpu_vm_grab_id(vm, ring, &job->sync, - &job->base.s_fence->finished, - job); + r = amdgpu_vmid_grab(vm, ring, &job->sync, + &job->base.s_fence->finished, + job); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); @@ -173,7 +173,7 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job, return fence; } -static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) +static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) { struct dma_fence *fence = NULL, *finished; struct amdgpu_device *adev; @@ -211,7 +211,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return fence; } -const struct amd_sched_backend_ops amdgpu_sched_ops = { +const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, .timedout_job = amdgpu_job_timedout, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index dc0a8be98043ee3476d774b8d043cc83688c1d10..5c4c3e0d527be64386dcab0951727642f64d73db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -37,6 +37,18 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +static bool amdgpu_need_backup(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + return false; + + if (amdgpu_gpu_recovery == 0 || + (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev))) + return false; + + return true; +} + static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); @@ -327,7 +339,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, uint64_t init_value, struct amdgpu_bo **bo_ptr) { - struct ttm_operation_ctx ctx = { !kernel, false }; + struct ttm_operation_ctx ctx = { + .interruptible = !kernel, + .no_wait_gpu = false, + .allow_reserved_eviction = true, + .resv = resv + }; struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 6f56ff606e43fcb58d90f0e7f984a85eb8471f77..01a996c6b802971af57df0bbd75ff007d8904f25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1,4 +1,6 @@ /* + * Copyright 2017 Advanced Micro Devices, Inc. + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation @@ -1276,16 +1278,16 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) /* XXX select vce level based on ring/task */ adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; mutex_unlock(&adev->pm.mutex); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); amdgpu_pm_compute_clocks(adev); } else { - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); mutex_lock(&adev->pm.mutex); adev->pm.dpm.vce_active = false; mutex_unlock(&adev->pm.mutex); @@ -1582,7 +1584,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_device *ddev = adev->ddev; u32 flags = 0; - amdgpu_get_clockgating_state(adev, &flags); + amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); amdgpu_parse_cg_state(m, flags); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index a98fbbb4739f65dad15dc717c1e66722770b5420..13044e66dcaf4e6ab0b79fab23aef8db987170db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -164,7 +164,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * Release a request for executing at @priority */ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { int i; @@ -175,7 +175,7 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, return; /* no need to restore if the job is already at the lowest priority */ - if (priority == AMD_SCHED_PRIORITY_NORMAL) + if (priority == DRM_SCHED_PRIORITY_NORMAL) return; mutex_lock(&ring->priority_mutex); @@ -184,8 +184,8 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, goto out_unlock; /* decay priority to the next level with a job available */ - for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { - if (i == AMD_SCHED_PRIORITY_NORMAL + for (i = priority; i >= DRM_SCHED_PRIORITY_MIN; i--) { + if (i == DRM_SCHED_PRIORITY_NORMAL || atomic_read(&ring->num_jobs[i])) { ring->priority = i; ring->funcs->set_priority(ring, i); @@ -206,7 +206,7 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, * Request a ring's priority to be raised to @priority (refcounted). */ void amdgpu_ring_priority_get(struct amdgpu_ring *ring, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { if (!ring->funcs->set_priority) return; @@ -263,25 +263,25 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - r = amdgpu_wb_get(adev, &ring->rptr_offs); + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); if (r) { dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &ring->wptr_offs); + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); if (r) { dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &ring->fence_offs); + r = amdgpu_device_wb_get(adev, &ring->fence_offs); if (r) { dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &ring->cond_exe_offs); + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); if (r) { dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); return r; @@ -317,12 +317,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; - ring->priority = AMD_SCHED_PRIORITY_NORMAL; + ring->priority = DRM_SCHED_PRIORITY_NORMAL; mutex_init(&ring->priority_mutex); INIT_LIST_HEAD(&ring->lru_list); amdgpu_ring_lru_touch(adev, ring); - for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) + for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) atomic_set(&ring->num_jobs[i], 0); if (amdgpu_debugfs_ring_init(adev, ring)) { @@ -348,11 +348,11 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; - amdgpu_wb_free(ring->adev, ring->rptr_offs); - amdgpu_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index a6b89e3932a5fcb7ebe1f0b7a2030940f57722cd..102dad3edf6a44be8462ea1d9303ad0be2fdd68b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -25,7 +25,7 @@ #define __AMDGPU_RING_H__ #include -#include "gpu_scheduler.h" +#include /* max number of rings */ #define AMDGPU_MAX_RINGS 18 @@ -121,11 +121,11 @@ struct amdgpu_ring_funcs { /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch); + unsigned vmid, bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); - void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, + void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); void (*emit_hdp_invalidate)(struct amdgpu_ring *ring); @@ -154,14 +154,14 @@ struct amdgpu_ring_funcs { void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, - enum amd_sched_priority priority); + enum drm_sched_priority priority); }; struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; - struct amd_gpu_scheduler sched; + struct drm_gpu_scheduler sched; struct list_head lru_list; struct amdgpu_bo *ring_obj; @@ -186,6 +186,7 @@ struct amdgpu_ring { uint64_t eop_gpu_addr; u32 doorbell_index; bool use_doorbell; + bool use_pollmem; unsigned wptr_offs; unsigned fence_offs; uint64_t current_ctx; @@ -196,7 +197,7 @@ struct amdgpu_ring { unsigned vm_inv_eng; bool has_compute_vm_bug; - atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; + atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; struct mutex priority_mutex; /* protected by priority_mutex */ int priority; @@ -212,9 +213,9 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); void amdgpu_ring_priority_get(struct amdgpu_ring *ring, - enum amd_sched_priority priority); + enum drm_sched_priority priority); void amdgpu_ring_priority_put(struct amdgpu_ring *ring, - enum amd_sched_priority priority); + enum drm_sched_priority priority); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 290cc3f9c433a3df7de41f1c1db228db30377ee8..86a0715d9431ded69bac338a38143d82789ad949 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -29,29 +29,29 @@ #include "amdgpu_vm.h" -enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) { switch (amdgpu_priority) { case AMDGPU_CTX_PRIORITY_VERY_HIGH: - return AMD_SCHED_PRIORITY_HIGH_HW; + return DRM_SCHED_PRIORITY_HIGH_HW; case AMDGPU_CTX_PRIORITY_HIGH: - return AMD_SCHED_PRIORITY_HIGH_SW; + return DRM_SCHED_PRIORITY_HIGH_SW; case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; + return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_LOW: case AMDGPU_CTX_PRIORITY_VERY_LOW: - return AMD_SCHED_PRIORITY_LOW; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_UNSET: - return AMD_SCHED_PRIORITY_UNSET; + return DRM_SCHED_PRIORITY_UNSET; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; + return DRM_SCHED_PRIORITY_INVALID; } } static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, int fd, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { struct file *filp = fcheck(fd); struct drm_file *file; @@ -86,11 +86,11 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_sched *args = data; struct amdgpu_device *adev = dev->dev_private; - enum amd_sched_priority priority; + enum drm_sched_priority priority; int r; priority = amdgpu_to_sched_priority(args->in.priority); - if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID) + if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID) return -EINVAL; switch (args->in.op) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h index b28c067d38223f2c78a3b523d628a234745f0c72..2a1a0c734bddb76e77a142f854e1e049973dde6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h @@ -27,7 +27,7 @@ #include -enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); +enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); int amdgpu_sched_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index ebe1ffbab0c1ddd91f70be72c37d24ef789f2919..df65c66dc956f7500810b5e3e42080650a484193 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -64,7 +64,7 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct dma_fence *f) { - struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (s_fence) { struct amdgpu_ring *ring; @@ -85,7 +85,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, */ static void *amdgpu_sync_get_owner(struct dma_fence *f) { - struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (s_fence) return s_fence->owner; @@ -120,7 +120,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit) { struct amdgpu_sync_entry *e; @@ -129,6 +129,10 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) continue; amdgpu_sync_keep_later(&e->fence, f); + + /* Preserve eplicit flag to not loose pipe line sync */ + e->explicit |= explicit; + return true; } return false; @@ -148,12 +152,11 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (!f) return 0; - if (amdgpu_sync_same_dev(adev, f) && amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) amdgpu_sync_keep_later(&sync->last_vm_update, f); - if (amdgpu_sync_add_later(sync, f)) + if (amdgpu_sync_add_later(sync, f, explicit)) return 0; e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); @@ -245,7 +248,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, hash_for_each_safe(sync->fences, i, tmp, e, node) { struct dma_fence *f = e->fence; - struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (dma_fence_is_signaled(f)) { hash_del(&e->node); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f337c316ec2c656a823230b355250929715db327..cace7a93fc943beab80622d98a2ba5a01ec71884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -1,4 +1,26 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + #if !defined(_AMDGPU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _AMDGPU_TRACE_H_ @@ -60,8 +82,8 @@ TRACE_EVENT(amdgpu_iv, __field(unsigned, client_id) __field(unsigned, src_id) __field(unsigned, ring_id) - __field(unsigned, vm_id) - __field(unsigned, vm_id_src) + __field(unsigned, vmid) + __field(unsigned, vmid_src) __field(uint64_t, timestamp) __field(unsigned, timestamp_src) __field(unsigned, pas_id) @@ -71,8 +93,8 @@ TRACE_EVENT(amdgpu_iv, __entry->client_id = iv->client_id; __entry->src_id = iv->src_id; __entry->ring_id = iv->ring_id; - __entry->vm_id = iv->vm_id; - __entry->vm_id_src = iv->vm_id_src; + __entry->vmid = iv->vmid; + __entry->vmid_src = iv->vmid_src; __entry->timestamp = iv->timestamp; __entry->timestamp_src = iv->timestamp_src; __entry->pas_id = iv->pas_id; @@ -81,9 +103,9 @@ TRACE_EVENT(amdgpu_iv, __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", + TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", __entry->client_id, __entry->src_id, - __entry->ring_id, __entry->vm_id, + __entry->ring_id, __entry->vmid, __entry->timestamp, __entry->pas_id, __entry->src_data[0], __entry->src_data[1], __entry->src_data[2], __entry->src_data[3]) @@ -197,7 +219,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_STRUCT__entry( __field(struct amdgpu_vm *, vm) __field(u32, ring) - __field(u32, vm_id) + __field(u32, vmid) __field(u32, vm_hub) __field(u64, pd_addr) __field(u32, needs_flush) @@ -206,13 +228,13 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->vm = vm; __entry->ring = ring->idx; - __entry->vm_id = job->vm_id; + __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", - __entry->vm, __entry->ring, __entry->vm_id, + __entry->vm, __entry->ring, __entry->vmid, __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); @@ -335,24 +357,24 @@ TRACE_EVENT(amdgpu_vm_copy_ptes, ); TRACE_EVENT(amdgpu_vm_flush, - TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id, + TP_PROTO(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr), - TP_ARGS(ring, vm_id, pd_addr), + TP_ARGS(ring, vmid, pd_addr), TP_STRUCT__entry( __field(u32, ring) - __field(u32, vm_id) + __field(u32, vmid) __field(u32, vm_hub) __field(u64, pd_addr) ), TP_fast_assign( __entry->ring = ring->idx; - __entry->vm_id = vm_id; + __entry->vmid = vmid; __entry->vm_hub = ring->funcs->vmhub; __entry->pd_addr = pd_addr; ), TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", - __entry->ring, __entry->vm_id, + __entry->ring, __entry->vmid, __entry->vm_hub,__entry->pd_addr) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 952e0bf3bc8426c54fa531c121709ce60b9eea17..e4bb435e614b86cacd4f264526423d73e3d32ace 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -76,7 +76,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int r; adev->mman.mem_global_referenced = false; @@ -108,8 +108,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; - r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); @@ -131,7 +131,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { - amd_sched_entity_fini(adev->mman.entity.sched, + drm_sched_entity_fini(adev->mman.entity.sched, &adev->mman.entity); mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); @@ -497,7 +497,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, goto out_cleanup; } - r = ttm_tt_bind(bo->ttm, &tmp_mem); + r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); if (unlikely(r)) { goto out_cleanup; } @@ -505,7 +505,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, ctx, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; @@ -536,7 +536,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, ctx->interruptible, ctx->no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } @@ -597,8 +597,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, ctx->interruptible, - ctx->no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, ctx, new_mem); if (r) { return r; } @@ -991,7 +990,8 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, return >t->ttm.ttm; } -static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) +static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1019,11 +1019,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { - return ttm_dma_populate(>t->ttm, adev->dev); + return ttm_dma_populate(>t->ttm, adev->dev, ctx); } #endif - return ttm_populate_and_map_pages(adev->dev, >t->ttm); + return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); } static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -1270,6 +1270,101 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .access_memory = &amdgpu_ttm_access_memory }; +/* + * Firmware Reservation functions + */ +/** + * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram + * + * @adev: amdgpu_device pointer + * + * free fw reserved vram if it has been reserved. + */ +static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, + NULL, &adev->fw_vram_usage.va); +} + +/** + * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from fw. + */ +static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) +{ + struct ttm_operation_ctx ctx = { false, false }; + int r = 0; + int i; + u64 vram_size = adev->mc.visible_vram_size; + u64 offset = adev->fw_vram_usage.start_offset; + u64 size = adev->fw_vram_usage.size; + struct amdgpu_bo *bo; + + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { + + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, + PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; + + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; + + /* remove the original mem node and create a new one at the + * request position + */ + bo = adev->fw_vram_usage.reserved_bo; + offset = ALIGN(offset, PAGE_SIZE); + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = offset >> PAGE_SHIFT; + bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + + ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); + r = ttm_bo_mem_space(&bo->tbo, &bo->placement, + &bo->tbo.mem, &ctx); + if (r) + goto error_pin; + + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size), NULL); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; + + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + } + return r; + +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; +} + int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; @@ -1312,7 +1407,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) *The reserved vram for firmware must be pinned to the specified *place on the VRAM, so reserve it early. */ - r = amdgpu_fw_reserve_vram_init(adev); + r = amdgpu_ttm_fw_reserve_vram_init(adev); if (r) { return r; } @@ -1330,9 +1425,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) struct sysinfo si; si_meminfo(&si); - gtt_size = max(AMDGPU_DEFAULT_GTT_SIZE_MB << 20, - (uint64_t)si.totalram * si.mem_unit * 3/4); - } else + gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size), + ((uint64_t)si.totalram * si.mem_unit * 3/4)); + } + else gtt_size = (uint64_t)amdgpu_gtt_size << 20; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); if (r) { @@ -1396,7 +1493,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_debugfs_fini(adev); amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); - amdgpu_fw_reserve_vram_fini(adev); + amdgpu_ttm_fw_reserve_vram_fini(adev); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 4f9433e6140622b5aff6569c8bf8c4941cbd375a..167856f6080fa432f23417f6e6117593d129ce59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -25,7 +25,7 @@ #define __AMDGPU_TTM_H__ #include "amdgpu.h" -#include "gpu_scheduler.h" +#include #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) @@ -55,7 +55,7 @@ struct amdgpu_mman { struct mutex gtt_window_lock; /* Scheduler entity for buffer moves */ - struct amd_sched_entity entity; + struct drm_sched_entity entity; }; struct amdgpu_copy_mem { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 2f2a9e17fdb4df1ad5456c66aa1060f679c9246f..b2eae86bf906abe6ed0bd7e89f67f2d9acbfa809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -116,7 +116,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work); int amdgpu_uvd_sw_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -230,8 +230,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) } ring = &adev->uvd.ring; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up UVD run queue.\n"); @@ -244,7 +244,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) } /* from uvd v5.0 HW addressing capacity increased to 64 bits */ - if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) adev->uvd.address_64_bit = true; switch (adev->asic_type) { @@ -272,7 +272,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) int i; kfree(adev->uvd.saved_bo); - amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); + drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo, &adev->uvd.gpu_addr, @@ -297,6 +297,8 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (adev->uvd.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->uvd.idle_work); + for (i = 0; i < adev->uvd.max_handles; ++i) if (atomic_read(&adev->uvd.handles[i])) break; @@ -304,8 +306,6 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_UVD_HANDLES) return 0; - cancel_delayed_work_sync(&adev->uvd.idle_work); - size = amdgpu_bo_size(adev->uvd.vcpu_bo); ptr = adev->uvd.cpu_addr; @@ -346,6 +346,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) ptr += le32_to_cpu(hdr->ucode_size_bytes); } memset_io(ptr, 0, size); + /* to restore uvd fence seq */ + amdgpu_fence_driver_force_completion(&adev->uvd.ring); } return 0; @@ -1153,10 +1155,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) } else { amdgpu_asic_set_uvd_clocks(adev, 0, 0); /* shutdown the UVD block */ - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); } } else { schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); @@ -1176,10 +1178,10 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) amdgpu_dpm_enable_uvd(adev, true); } else { amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_UNGATE); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 845eea993f75ca5b567f2ac514b24e95e37b9a64..32ea20b99e53a3818417945877f8e1df56ef14fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -51,8 +51,8 @@ struct amdgpu_uvd { struct amdgpu_irq_src irq; bool address_64_bit; bool use_ctx_buf; - struct amd_sched_entity entity; - struct amd_sched_entity entity_enc; + struct drm_sched_entity entity; + struct drm_sched_entity entity_enc; uint32_t srbm_soft_reset; unsigned num_enc_rings; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ba6d846b08ff62e33a9f5b8ad0235d26e6c05a47..d274ae53553059a5970ffd28d5e5e3fcc7398143 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -85,7 +85,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work); int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; const char *fw_name; const struct common_firmware_header *hdr; unsigned ucode_version, version_major, version_minor, binary_id; @@ -174,8 +174,8 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) } ring = &adev->vce.ring[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); @@ -207,7 +207,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; - amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); + drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, (void **)&adev->vce.cpu_addr); @@ -311,10 +311,10 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) amdgpu_dpm_enable_vce(adev, false); } else { amdgpu_asic_set_vce_clocks(adev, 0, 0); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); } } else { schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); @@ -343,10 +343,10 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) amdgpu_dpm_enable_vce(adev, true); } else { amdgpu_asic_set_vce_clocks(adev, 53300, 40000); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); } } @@ -585,8 +585,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, for (i = 0; i < bo->placement.num_placement; ++i) { bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn); - bo->placements[i].lpfn = bo->placements[i].fpfn ? - min(bo->placements[i].fpfn, lpfn) : lpfn; + bo->placements[i].lpfn = bo->placements[i].lpfn ? + min(bo->placements[i].lpfn, lpfn) : lpfn; } return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } @@ -991,7 +991,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) * */ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 5ce54cde472d8dbfb7c9b9be556359ffe04527a2..0fd378ae92c3faee3efd8d2f45a7561afecb8274 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -46,7 +46,7 @@ struct amdgpu_vce { struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; struct amdgpu_irq_src irq; unsigned harvest_config; - struct amd_sched_entity entity; + struct drm_sched_entity entity; uint32_t srbm_soft_reset; unsigned num_rings; }; @@ -63,7 +63,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch); + unsigned vmid, bool ctx_switch); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index d7ba048c2f80c136e4a2f998985966e78e872abd..837962118dbc5d56de555791d99b8d6215520331 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -35,7 +35,6 @@ #include "soc15d.h" #include "soc15_common.h" -#include "soc15ip.h" #include "vcn/vcn_1_0_offset.h" /* 1 second timeout */ @@ -51,7 +50,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_sw_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -104,8 +103,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } ring = &adev->vcn.ring_dec; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN dec run queue.\n"); @@ -113,8 +112,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } ring = &adev->vcn.ring_enc[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN enc run queue.\n"); @@ -130,9 +129,9 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) kfree(adev->vcn.saved_bo); - amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); + drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); - amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); + drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index d50ba06578542e5a7df60dbc963a75edd1d1b672..2fd7db891689f514c0b98f72eb2d905311b31ffc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -56,8 +56,8 @@ struct amdgpu_vcn { struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_irq_src irq; - struct amd_sched_entity entity_dec; - struct amd_sched_entity entity_enc; + struct drm_sched_entity entity_dec; + struct drm_sched_entity entity_enc; unsigned num_enc_rings; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3ecdbdfb04dd72899c64f9f22c8b9f2dfb893b74..6fc16eecf2dce5fc448afaa1681fbb086259ea35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -33,52 +33,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" -/* - * PASID manager - * - * PASIDs are global address space identifiers that can be shared - * between the GPU, an IOMMU and the driver. VMs on different devices - * may use the same PASID if they share the same address - * space. Therefore PASIDs are allocated using a global IDA. VMs are - * looked up from the PASID per amdgpu_device. - */ -static DEFINE_IDA(amdgpu_vm_pasid_ida); - -/** - * amdgpu_vm_alloc_pasid - Allocate a PASID - * @bits: Maximum width of the PASID in bits, must be at least 1 - * - * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. - * - * Returns a positive integer on success. Returns %-EINVAL if bits==0. - * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on - * memory allocation failure. - */ -int amdgpu_vm_alloc_pasid(unsigned int bits) -{ - int pasid = -EINVAL; - - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&amdgpu_vm_pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } - - return pasid; -} - -/** - * amdgpu_vm_free_pasid - Free a PASID - * @pasid: PASID to free - */ -void amdgpu_vm_free_pasid(unsigned int pasid) -{ - ida_simple_remove(&amdgpu_vm_pasid_ida, pasid); -} - /* * GPUVM * GPUVM is similar to the legacy gart on older asics, however @@ -148,12 +102,23 @@ struct amdgpu_prt_cb { static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, unsigned level) { - if (level != adev->vm_manager.num_level) - return 9 * (adev->vm_manager.num_level - level - 1) + + unsigned shift = 0xff; + + switch (level) { + case AMDGPU_VM_PDB2: + case AMDGPU_VM_PDB1: + case AMDGPU_VM_PDB0: + shift = 9 * (AMDGPU_VM_PDB0 - level) + adev->vm_manager.block_size; - else - /* For the page tables on the leaves */ - return 0; + break; + case AMDGPU_VM_PTB: + shift = 0; + break; + default: + dev_err(adev->dev, "the level%d isn't supported.\n", level); + } + + return shift; } /** @@ -166,12 +131,13 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) { - unsigned shift = amdgpu_vm_level_shift(adev, 0); + unsigned shift = amdgpu_vm_level_shift(adev, + adev->vm_manager.root_level); - if (level == 0) + if (level == adev->vm_manager.root_level) /* For the root directory */ return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; - else if (level != adev->vm_manager.num_level) + else if (level != AMDGPU_VM_PTB) /* Everything in between */ return 512; else @@ -329,9 +295,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, to >= amdgpu_vm_num_entries(adev, level)) return -EINVAL; - if (to > parent->last_entry_used) - parent->last_entry_used = to; - ++level; saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); @@ -346,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (vm->pte_support_ats) { init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != adev->vm_manager.num_level - 1) + if (level != AMDGPU_VM_PTB) init_value |= AMDGPU_PDE_PTE; } @@ -386,10 +349,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_add(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); - entry->addr = 0; } - if (level < adev->vm_manager.num_level) { + if (level < AMDGPU_VM_PTB) { uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); @@ -435,287 +397,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); -} - -/** - * amdgpu_vm_had_gpu_reset - check if reset occured since last use - * - * @adev: amdgpu_device pointer - * @id: VMID structure - * - * Check if GPU reset occured since last use of the VMID. - */ -static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, - struct amdgpu_vm_id *id) -{ - return id->current_gpu_reset_count != - atomic_read(&adev->gpu_reset_counter); -} - -static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) -{ - return !!vm->reserved_vmid[vmhub]; -} - -/* idr_mgr->lock must be held */ -static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, - struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; - uint64_t fence_context = adev->fence_context + ring->idx; - struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct dma_fence *updates = sync->last_vm_update; - int r = 0; - struct dma_fence *flushed, *tmp; - bool needs_flush = vm->use_cpu_for_update; - - flushed = id->flushed_updates; - if ((amdgpu_vm_had_gpu_reset(adev, id)) || - (atomic64_read(&id->owner) != vm->client_id) || - (job->vm_pd_addr != id->pd_gpu_addr) || - (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) || - (!id->last_flush || (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush)))) { - needs_flush = true; - /* to prevent one context starved by another context */ - id->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&id->active, ring); - if (tmp) { - r = amdgpu_sync_fence(adev, sync, tmp, false); - return r; - } - } - - /* Good we can use this VMID. Remember this submission as - * user of the VMID. - */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) - goto out; - - if (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - } - id->pd_gpu_addr = job->vm_pd_addr; - atomic64_set(&id->owner, vm->client_id); - job->vm_needs_flush = needs_flush; - if (needs_flush) { - dma_fence_put(id->last_flush); - id->last_flush = NULL; - } - job->vm_id = id - id_mgr->ids; - trace_amdgpu_vm_grab_id(vm, ring, job); -out: - return r; -} - -/** - * amdgpu_vm_grab_id - allocate the next free VMID - * - * @vm: vm to allocate id for - * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse - * - * Allocate an id for the vm, adding fences to the sync obj as necessary. - */ -int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; - struct amdgpu_vm_id *id, *idle; - struct dma_fence **fences; - unsigned i; - int r = 0; - - mutex_lock(&id_mgr->lock); - if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { - r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); - mutex_unlock(&id_mgr->lock); - return r; - } - fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) { - mutex_unlock(&id_mgr->lock); - return -ENOMEM; - } - /* Check if we have an idle VMID */ - i = 0; - list_for_each_entry(idle, &id_mgr->ids_lru, list) { - fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); - if (!fences[i]) - break; - ++i; - } - - /* If we can't find a idle VMID to use, wait till one becomes available */ - if (&idle->list == &id_mgr->ids_lru) { - u64 fence_context = adev->vm_manager.fence_context + ring->idx; - unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; - struct dma_fence_array *array; - unsigned j; - - for (j = 0; j < i; ++j) - dma_fence_get(fences[j]); - - array = dma_fence_array_create(i, fences, fence_context, - seqno, true); - if (!array) { - for (j = 0; j < i; ++j) - dma_fence_put(fences[j]); - kfree(fences); - r = -ENOMEM; - goto error; - } - - - r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); - dma_fence_put(&array->base); - if (r) - goto error; - - mutex_unlock(&id_mgr->lock); - return 0; - - } - kfree(fences); - - job->vm_needs_flush = vm->use_cpu_for_update; - /* Check if we can use a VMID already assigned to this VM */ - list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { - struct dma_fence *flushed; - bool needs_flush = vm->use_cpu_for_update; - - /* Check all the prerequisites to using this VMID */ - if (amdgpu_vm_had_gpu_reset(adev, id)) - continue; - - if (atomic64_read(&id->owner) != vm->client_id) - continue; - - if (job->vm_pd_addr != id->pd_gpu_addr) - continue; - - if (!id->last_flush || - (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush))) - needs_flush = true; - - flushed = id->flushed_updates; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) - needs_flush = true; - - /* Concurrent flushes are only possible starting with Vega10 */ - if (adev->asic_type < CHIP_VEGA10 && needs_flush) - continue; - - /* Good we can use this VMID. Remember this submission as - * user of the VMID. - */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) - goto error; - - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - } - - if (needs_flush) - goto needs_flush; - else - goto no_flush_needed; - - }; - - /* Still no ID to use? Then use the idle one found earlier */ - id = idle; - - /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) - goto error; - - id->pd_gpu_addr = job->vm_pd_addr; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - atomic64_set(&id->owner, vm->client_id); - -needs_flush: - job->vm_needs_flush = true; - dma_fence_put(id->last_flush); - id->last_flush = NULL; - -no_flush_needed: - list_move_tail(&id->list, &id_mgr->ids_lru); - - job->vm_id = id - id_mgr->ids; - trace_amdgpu_vm_grab_id(vm, ring, job); - -error: - mutex_unlock(&id_mgr->lock); - return r; -} - -static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub) -{ - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - - mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) { - list_add(&vm->reserved_vmid[vmhub]->list, - &id_mgr->ids_lru); - vm->reserved_vmid[vmhub] = NULL; - atomic_dec(&id_mgr->reserved_vmid_num); - } - mutex_unlock(&id_mgr->lock); -} - -static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub) -{ - struct amdgpu_vm_id_manager *id_mgr; - struct amdgpu_vm_id *idle; - int r = 0; - - id_mgr = &adev->vm_manager.id_mgr[vmhub]; - mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) - goto unlock; - if (atomic_inc_return(&id_mgr->reserved_vmid_num) > - AMDGPU_VM_MAX_RESERVED_VMID) { - DRM_ERROR("Over limitation of reserved vmid\n"); - atomic_dec(&id_mgr->reserved_vmid_num); - r = -EINVAL; - goto unlock; - } - /* Select the first entry VMID */ - idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); - list_del_init(&idle->list); - vm->reserved_vmid[vmhub] = idle; - mutex_unlock(&id_mgr->lock); - - return 0; -unlock: - mutex_unlock(&id_mgr->lock); - return r; + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, + adev->vm_manager.root_level); } /** @@ -732,7 +415,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) has_compute_vm_bug = false; - ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); if (ip_block) { /* Compute has a VM bug for GFX version < 7. Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ @@ -758,14 +441,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vm_id *id; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id; bool gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; - if (job->vm_id == 0) + if (job->vmid == 0) return false; - id = &id_mgr->ids[job->vm_id]; + id = &id_mgr->ids[job->vmid]; gds_switch_needed = ring->funcs->emit_gds_switch && ( id->gds_base != job->gds_base || id->gds_size != job->gds_size || @@ -774,7 +457,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, id->oa_base != job->oa_base || id->oa_size != job->oa_size); - if (amdgpu_vm_had_gpu_reset(adev, id)) + if (amdgpu_vmid_had_gpu_reset(adev, id)) return true; return vm_flush_needed || gds_switch_needed; @@ -789,7 +472,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vm_id: vmid number to use + * @vmid: vmid number to use * @pd_addr: address of the page directory * * Emit a VM flush when it is necessary. @@ -798,8 +481,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id]; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; bool gds_switch_needed = ring->funcs->emit_gds_switch && ( id->gds_base != job->gds_base || id->gds_size != job->gds_size || @@ -811,7 +494,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ unsigned patch_offset = 0; int r; - if (amdgpu_vm_had_gpu_reset(adev, id)) { + if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; } @@ -828,8 +511,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ if (ring->funcs->emit_vm_flush && vm_flush_needed) { struct dma_fence *fence; - trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); + trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -849,7 +532,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->gws_size = job->gws_size; id->oa_base = job->oa_base; id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base, + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); @@ -866,49 +549,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ return 0; } -/** - * amdgpu_vm_reset_id - reset VMID to zero - * - * @adev: amdgpu device structure - * @vm_id: vmid number to use - * - * Reset saved GDW, GWS and OA to force switch on next flush. - */ -void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, - unsigned vmid) -{ - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; - - atomic64_set(&id->owner, 0); - id->gds_base = 0; - id->gds_size = 0; - id->gws_base = 0; - id->gws_size = 0; - id->oa_base = 0; - id->oa_size = 0; -} - -/** - * amdgpu_vm_reset_all_id - reset VMID to zero - * - * @adev: amdgpu device structure - * - * Reset VMID to force flush on next use - */ -void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) -{ - unsigned i, j; - - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vm_id_manager *id_mgr = - &adev->vm_manager.id_mgr[i]; - - for (j = 1; j < id_mgr->num_ids; ++j) - amdgpu_vm_reset_id(adev, i, j); - } -} - /** * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo * @@ -1060,162 +700,52 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /* - * amdgpu_vm_update_level - update a single level in the hierarchy + * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @adev: amdgpu_device pointer + * @param: parameters for the update * @vm: requested vm * @parent: parent directory + * @entry: entry to update * - * Makes sure all entries in @parent are up to date. - * Returns 0 for success, error for failure. + * Makes sure the requested entry in parent is up to date. */ -static int amdgpu_vm_update_level(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent) +static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *shadow; - struct amdgpu_ring *ring = NULL; + struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; uint64_t pd_addr, shadow_addr = 0; - uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw = 0; - struct amdgpu_job *job; - struct amdgpu_pte_update_params params; - struct dma_fence *fence = NULL; - uint32_t incr; - - int r; - - if (!parent->entries) - return 0; + uint64_t pde, pt, flags; + unsigned level; - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - shadow = parent->base.bo->shadow; + /* Don't update huge pages here */ + if (entry->huge) + return; if (vm->use_cpu_for_update) { pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); - if (unlikely(r)) - return r; - - params.func = amdgpu_vm_cpu_set_ptes; } else { - ring = container_of(vm->entity.sched, struct amdgpu_ring, - sched); - - /* padding, etc. */ - ndw = 64; - - /* assume the worst case */ - ndw += parent->last_entry_used * 6; - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - - if (shadow) { + shadow = parent->base.bo->shadow; + if (shadow) shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; - - params.ib = &job->ibs[0]; - params.func = amdgpu_vm_do_set_ptes; - } - - - /* walk over the address space and update the directory */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - struct amdgpu_bo *bo = entry->base.bo; - uint64_t pde, pt; - - if (bo == NULL) - continue; - - spin_lock(&vm->status_lock); - list_del_init(&entry->base.vm_status); - spin_unlock(&vm->status_lock); - - pt = amdgpu_bo_gpu_offset(bo); - pt = amdgpu_gart_get_vm_pde(adev, pt); - /* Don't update huge pages here */ - if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || - parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) - continue; - - parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; - - pde = pd_addr + pt_idx * 8; - incr = amdgpu_bo_size(bo); - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt) || - (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { - - if (count) { - if (shadow) - params.func(¶ms, - last_shadow, - last_pt, count, - incr, - AMDGPU_PTE_VALID); - - params.func(¶ms, last_pde, - last_pt, count, incr, - AMDGPU_PTE_VALID); - } - - count = 1; - last_pde = pde; - last_shadow = shadow_addr + pt_idx * 8; - last_pt = pt; - } else { - ++count; - } } - if (count) { - if (vm->root.base.bo->shadow) - params.func(¶ms, last_shadow, last_pt, - count, incr, AMDGPU_PTE_VALID); + for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + pbo = pbo->parent; - params.func(¶ms, last_pde, last_pt, - count, incr, AMDGPU_PTE_VALID); + level += params->adev->vm_manager.root_level; + pt = amdgpu_bo_gpu_offset(bo); + flags = AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); + if (shadow) { + pde = shadow_addr + (entry - parent->entries) * 8; + params->func(params, pde, pt, 1, 0, flags); } - if (!vm->use_cpu_for_update) { - if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, - parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, - shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); - if (r) - goto error_free; - - amdgpu_bo_fence(parent->base.bo, fence, true); - dma_fence_put(vm->last_update); - vm->last_update = fence; - } - } - - return 0; - -error_free: - amdgpu_job_free(job); - return r; + pde = pd_addr + (entry - parent->entries) * 8; + params->func(params, pde, pt, 1, 0, flags); } /* @@ -1225,27 +755,29 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, * * Mark all PD level as invalid after an error. */ -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent) +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + unsigned level) { - unsigned pt_idx; + unsigned pt_idx, num_entries; /* * Recurse into the subdirectories. This recursion is harmless because * we only have a maximum of 5 layers. */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + num_entries = amdgpu_vm_num_entries(adev, level); + for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; if (!entry->base.bo) continue; - entry->addr = ~0ULL; spin_lock(&vm->status_lock); if (list_empty(&entry->base.vm_status)) list_add(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); - amdgpu_vm_invalidate_level(vm, entry); + amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); } } @@ -1261,38 +793,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { + struct amdgpu_pte_update_params params; + struct amdgpu_job *job; + unsigned ndw = 0; int r = 0; + if (list_empty(&vm->relocated)) + return 0; + +restart: + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + + if (vm->use_cpu_for_update) { + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + if (unlikely(r)) + return r; + + params.func = amdgpu_vm_cpu_set_ptes; + } else { + ndw = 512 * 8; + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + if (r) + return r; + + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } + spin_lock(&vm->status_lock); while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_vm_bo_base *bo_base, *parent; + struct amdgpu_vm_pt *pt, *entry; struct amdgpu_bo *bo; bo_base = list_first_entry(&vm->relocated, struct amdgpu_vm_bo_base, vm_status); + list_del_init(&bo_base->vm_status); spin_unlock(&vm->status_lock); bo = bo_base->bo->parent; - if (bo) { - struct amdgpu_vm_bo_base *parent; - struct amdgpu_vm_pt *pt; - - parent = list_first_entry(&bo->va, - struct amdgpu_vm_bo_base, - bo_list); - pt = container_of(parent, struct amdgpu_vm_pt, base); - - r = amdgpu_vm_update_level(adev, vm, pt); - if (r) { - amdgpu_vm_invalidate_level(vm, &vm->root); - return r; - } + if (!bo) { spin_lock(&vm->status_lock); - } else { - spin_lock(&vm->status_lock); - list_del_init(&bo_base->vm_status); + continue; } + + parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, + bo_list); + pt = container_of(parent, struct amdgpu_vm_pt, base); + entry = container_of(bo_base, struct amdgpu_vm_pt, base); + + amdgpu_vm_update_pde(¶ms, vm, pt, entry); + + spin_lock(&vm->status_lock); + if (!vm->use_cpu_for_update && + (ndw - params.ib->length_dw) < 32) + break; } spin_unlock(&vm->status_lock); @@ -1300,8 +857,44 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, /* Flush HDP */ mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); + } else if (params.ib->length_dw == 0) { + amdgpu_job_free(job); + } else { + struct amdgpu_bo *root = vm->root.base.bo; + struct amdgpu_ring *ring; + struct dma_fence *fence; + + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); + + amdgpu_ring_pad_ib(ring, params.ib); + amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, + AMDGPU_FENCE_OWNER_VM, false); + if (root->shadow) + amdgpu_sync_resv(adev, &job->sync, + root->shadow->tbo.resv, + AMDGPU_FENCE_OWNER_VM, false); + + WARN_ON(params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) + goto error; + + amdgpu_bo_fence(root, fence, true); + dma_fence_put(vm->last_update); + vm->last_update = fence; } + if (!list_empty(&vm->relocated)) + goto restart; + + return 0; + +error: + amdgpu_vm_invalidate_level(adev, vm, &vm->root, + adev->vm_manager.root_level); + amdgpu_job_free(job); return r; } @@ -1319,19 +912,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, struct amdgpu_vm_pt **entry, struct amdgpu_vm_pt **parent) { - unsigned level = 0; + unsigned level = p->adev->vm_manager.root_level; *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - unsigned idx = addr >> amdgpu_vm_level_shift(p->adev, level++); + unsigned shift = amdgpu_vm_level_shift(p->adev, level++); - idx %= amdgpu_bo_size((*entry)->base.bo) / 8; *parent = *entry; - *entry = &(*entry)->entries[idx]; + *entry = &(*entry)->entries[addr >> shift]; + addr &= (1ULL << shift) - 1; } - if (level != p->adev->vm_manager.num_level) + if (level != AMDGPU_VM_PTB) *entry = NULL; } @@ -1353,56 +946,42 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, unsigned nptes, uint64_t dst, uint64_t flags) { - bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); uint64_t pd_addr, pde; /* In the case of a mixed PT the PDE must point to it*/ - if (p->adev->asic_type < CHIP_VEGA10 || - nptes != AMDGPU_VM_PTE_COUNT(p->adev) || - p->src || - !(flags & AMDGPU_PTE_VALID)) { - - dst = amdgpu_bo_gpu_offset(entry->base.bo); - dst = amdgpu_gart_get_vm_pde(p->adev, dst); - flags = AMDGPU_PTE_VALID; - } else { + if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && + nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { /* Set the huge page flag to stop scanning at this PDE */ flags |= AMDGPU_PDE_PTE; } - if (entry->addr == (dst | flags)) + if (!(flags & AMDGPU_PDE_PTE)) { + if (entry->huge) { + /* Add the entry to the relocated list to update it. */ + entry->huge = false; + spin_lock(&p->vm->status_lock); + list_move(&entry->base.vm_status, &p->vm->relocated); + spin_unlock(&p->vm->status_lock); + } return; + } - entry->addr = (dst | flags); - - if (use_cpu_update) { - /* In case a huge page is replaced with a system - * memory mapping, p->pages_addr != NULL and - * amdgpu_vm_cpu_set_ptes would try to translate dst - * through amdgpu_vm_map_gart. But dst is already a - * GPU address (of the page table). Disable - * amdgpu_vm_map_gart temporarily. - */ - dma_addr_t *tmp; - - tmp = p->pages_addr; - p->pages_addr = NULL; + entry->huge = true; + amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, + &dst, &flags); + if (p->func == amdgpu_vm_cpu_set_ptes) { pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); - - p->pages_addr = tmp; } else { if (parent->base.bo->shadow) { pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + p->func(p, pde, dst, 1, 0, flags); } pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } + pde = pd_addr + (entry - parent->entries) * 8; + p->func(p, pde, dst, 1, 0, flags); } /** @@ -1447,7 +1026,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, amdgpu_vm_handle_huge_pages(params, entry, parent, nptes, dst, flags); /* We don't need to update PTEs for huge pages */ - if (entry->addr & AMDGPU_PDE_PTE) + if (entry->huge) continue; pt = entry->base.bo; @@ -1606,14 +1185,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, * * The second command is for the shadow pagetables. */ - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; + if (vm->root.base.bo->shadow) + ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; + else + ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); /* padding, etc. */ ndw = 64; - /* one PDE write for each huge page */ - ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; - if (pages_addr) { /* copy commands needed */ ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; @@ -1688,7 +1267,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); - amdgpu_vm_invalidate_level(vm, &vm->root); return r; } @@ -2099,18 +1677,31 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_lock(&vm->status_lock); while (!list_empty(&vm->moved)) { struct amdgpu_bo_va *bo_va; + struct reservation_object *resv; bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); + resv = bo_va->base.bo->tbo.resv; + /* Per VM BOs never need to bo cleared in the page tables */ - clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; + if (resv == vm->root.base.bo->tbo.resv) + clear = false; + /* Try to reserve the BO to avoid clearing its ptes */ + else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) + clear = false; + /* Somebody else is using the BO right now */ + else + clear = true; r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; + if (!clear && resv != vm->root.base.bo->tbo.resv) + reservation_object_unlock(resv); + spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); @@ -2150,8 +1741,26 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - if (bo) - list_add_tail(&bo_va->base.bo_list, &bo->va); + if (!bo) + return bo_va; + + list_add_tail(&bo_va->base.bo_list, &bo->va); + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return bo_va; + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return bo_va; + + /* + * We checked all the prerequisites, but it looks like this per VM BO + * is currently evicted. add the BO to the evicted list to make sure it + * is validated on next VM use to avoid fault. + * */ + spin_lock(&vm->status_lock); + list_move_tail(&bo_va->base.vm_status, &vm->evicted); + spin_unlock(&vm->status_lock); return bo_va; } @@ -2604,7 +2213,19 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, tmp >>= amdgpu_vm_block_size - 9; tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; adev->vm_manager.num_level = min(max_level, (unsigned)tmp); - + switch (adev->vm_manager.num_level) { + case 3: + adev->vm_manager.root_level = AMDGPU_VM_PDB2; + break; + case 2: + adev->vm_manager.root_level = AMDGPU_VM_PDB1; + break; + case 1: + adev->vm_manager.root_level = AMDGPU_VM_PDB0; + break; + default: + dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); + } /* block size depends on vm size and hw setup*/ if (amdgpu_vm_block_size != -1) adev->vm_manager.block_size = @@ -2643,13 +2264,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int r, i; u64 flags; uint64_t init_pde_value = 0; vm->va = RB_ROOT_CACHED; - vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); @@ -2663,8 +2283,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); ring_instance %= adev->vm_manager.vm_pte_num_rings; ring = adev->vm_manager.vm_pte_rings[ring_instance]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; - r = amd_sched_entity_init(&ring->sched, &vm->entity, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&ring->sched, &vm->entity, rq, amdgpu_sched_jobs, NULL); if (r) return r; @@ -2698,7 +2318,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); - r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, + r = amdgpu_bo_create(adev, + amdgpu_vm_bo_size(adev, adev->vm_manager.root_level), + align, true, AMDGPU_GEM_DOMAIN_VRAM, flags, NULL, NULL, init_pde_value, &vm->root.base.bo); @@ -2744,7 +2366,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->root.base.bo = NULL; error_free_sched_entity: - amd_sched_entity_fini(&ring->sched, &vm->entity); + drm_sched_entity_fini(&ring->sched, &vm->entity); return r; } @@ -2752,26 +2374,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_vm_free_levels - free PD/PT levels * - * @level: PD/PT starting level to free + * @adev: amdgpu device structure + * @parent: PD/PT starting level to free + * @level: level of parent structure * * Free the page directory or page table level and all sub levels. */ -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) +static void amdgpu_vm_free_levels(struct amdgpu_device *adev, + struct amdgpu_vm_pt *parent, + unsigned level) { - unsigned i; + unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); - if (level->base.bo) { - list_del(&level->base.bo_list); - list_del(&level->base.vm_status); - amdgpu_bo_unref(&level->base.bo->shadow); - amdgpu_bo_unref(&level->base.bo); + if (parent->base.bo) { + list_del(&parent->base.bo_list); + list_del(&parent->base.vm_status); + amdgpu_bo_unref(&parent->base.bo->shadow); + amdgpu_bo_unref(&parent->base.bo); } - if (level->entries) - for (i = 0; i <= level->last_entry_used; i++) - amdgpu_vm_free_levels(&level->entries[i]); + if (parent->entries) + for (i = 0; i < num_entries; i++) + amdgpu_vm_free_levels(adev, &parent->entries[i], + level + 1); - kvfree(level->entries); + kvfree(parent->entries); } /** @@ -2803,7 +2430,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - amd_sched_entity_fini(vm->entity.sched, &vm->entity); + drm_sched_entity_fini(vm->entity.sched, &vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2829,13 +2456,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) { dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); } else { - amdgpu_vm_free_levels(&vm->root); + amdgpu_vm_free_levels(adev, &vm->root, + adev->vm_manager.root_level); amdgpu_bo_unreserve(root); } amdgpu_bo_unref(&root); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) - amdgpu_vm_free_reserved_vmid(adev, vm, i); + amdgpu_vmid_free_reserved(adev, vm, i); } /** @@ -2854,17 +2482,21 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, spin_lock(&adev->vm_manager.pasid_lock); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - spin_unlock(&adev->vm_manager.pasid_lock); - if (!vm) + if (!vm) { /* VM not found, can't track fault credit */ + spin_unlock(&adev->vm_manager.pasid_lock); return true; + } /* No lock needed. only accessed by IRQ handler */ - if (!vm->fault_credit) + if (!vm->fault_credit) { /* Too many faults in this VM */ + spin_unlock(&adev->vm_manager.pasid_lock); return false; + } vm->fault_credit--; + spin_unlock(&adev->vm_manager.pasid_lock); return true; } @@ -2877,23 +2509,9 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, */ void amdgpu_vm_manager_init(struct amdgpu_device *adev) { - unsigned i, j; - - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vm_id_manager *id_mgr = - &adev->vm_manager.id_mgr[i]; + unsigned i; - mutex_init(&id_mgr->lock); - INIT_LIST_HEAD(&id_mgr->ids_lru); - atomic_set(&id_mgr->reserved_vmid_num, 0); - - /* skip over VMID 0, since it is the system VM */ - for (j = 1; j < id_mgr->num_ids; ++j) { - amdgpu_vm_reset_id(adev, i, j); - amdgpu_sync_create(&id_mgr->ids[i].active); - list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); - } - } + amdgpu_vmid_mgr_init(adev); adev->vm_manager.fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); @@ -2901,7 +2519,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->vm_manager.seqno[i] = 0; atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); - atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); @@ -2934,24 +2551,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { - unsigned i, j; - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); idr_destroy(&adev->vm_manager.pasid_idr); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vm_id_manager *id_mgr = - &adev->vm_manager.id_mgr[i]; - - mutex_destroy(&id_mgr->lock); - for (j = 0; j < AMDGPU_NUM_VM; ++j) { - struct amdgpu_vm_id *id = &id_mgr->ids[j]; - - amdgpu_sync_free(&id->active); - dma_fence_put(id->flushed_updates); - dma_fence_put(id->last_flush); - } - } + amdgpu_vmid_mgr_fini(adev); } int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -2964,13 +2567,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, - AMDGPU_GFXHUB); + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 43ea131dd411b8626816e7df608e203a5b929fe2..21a80f1bb2b9cd9779d475db133b9d70a77ebcec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -24,12 +24,14 @@ #ifndef __AMDGPU_VM_H__ #define __AMDGPU_VM_H__ -#include #include +#include +#include +#include -#include "gpu_scheduler.h" #include "amdgpu_sync.h" #include "amdgpu_ring.h" +#include "amdgpu_ids.h" struct amdgpu_bo_va; struct amdgpu_job; @@ -39,9 +41,6 @@ struct amdgpu_bo_list_entry; * GPUVM handling */ -/* maximum number of VMIDs */ -#define AMDGPU_NUM_VM 16 - /* Maximum number of PTEs the hardware can write with one command */ #define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF @@ -69,6 +68,12 @@ struct amdgpu_bo_list_entry; /* PDE is handled as PTE for VEGA10 */ #define AMDGPU_PDE_PTE (1ULL << 54) +/* PTE is handled as PDE for VEGA10 (Translate Further) */ +#define AMDGPU_PTE_TF (1ULL << 56) + +/* PDE Block Fragment Size for VEGA10 */ +#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) @@ -119,6 +124,16 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) +/* VMPT level enumerate, and the hiberachy is: + * PDB2->PDB1->PDB0->PTB + */ +enum amdgpu_vm_level { + AMDGPU_VM_PDB2, + AMDGPU_VM_PDB1, + AMDGPU_VM_PDB0, + AMDGPU_VM_PTB +}; + /* base structure for tracking BO usage in a VM */ struct amdgpu_vm_bo_base { /* constant after initialization */ @@ -137,11 +152,10 @@ struct amdgpu_vm_bo_base { struct amdgpu_vm_pt { struct amdgpu_vm_bo_base base; - uint64_t addr; + bool huge; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; - unsigned last_entry_used; }; #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) @@ -175,13 +189,11 @@ struct amdgpu_vm { spinlock_t freed_lock; /* Scheduler entity for page table updates */ - struct amd_sched_entity entity; + struct drm_sched_entity entity; - /* client id and PASID (TODO: replace client_id with PASID) */ - u64 client_id; unsigned int pasid; /* dedicated to vm */ - struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; @@ -196,37 +208,9 @@ struct amdgpu_vm { unsigned int fault_credit; }; -struct amdgpu_vm_id { - struct list_head list; - struct amdgpu_sync active; - struct dma_fence *last_flush; - atomic64_t owner; - - uint64_t pd_gpu_addr; - /* last flushed PD/PT update */ - struct dma_fence *flushed_updates; - - uint32_t current_gpu_reset_count; - - uint32_t gds_base; - uint32_t gds_size; - uint32_t gws_base; - uint32_t gws_size; - uint32_t oa_base; - uint32_t oa_size; -}; - -struct amdgpu_vm_id_manager { - struct mutex lock; - unsigned num_ids; - struct list_head ids_lru; - struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; - atomic_t reserved_vmid_num; -}; - struct amdgpu_vm_manager { /* Handling of VMIDs */ - struct amdgpu_vm_id_manager id_mgr[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS]; /* Handling of VM fences */ u64 fence_context; @@ -236,6 +220,7 @@ struct amdgpu_vm_manager { uint32_t num_level; uint32_t block_size; uint32_t fragment_size; + enum amdgpu_vm_level root_level; /* vram base address for page table entry */ u64 vram_base_offset; /* vm pte handling */ @@ -243,8 +228,6 @@ struct amdgpu_vm_manager { struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; unsigned vm_pte_num_rings; atomic_t vm_pte_next_ring; - /* client id counter */ - atomic64_t client_counter; /* partial resident texture handling */ spinlock_t prt_lock; @@ -263,8 +246,6 @@ struct amdgpu_vm_manager { spinlock_t pasid_lock; }; -int amdgpu_vm_alloc_pasid(unsigned int bits); -void amdgpu_vm_free_pasid(unsigned int pasid); void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -282,13 +263,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size); -int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); -void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, - unsigned vmid); -void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index f11c0aacf19f5a34652da90522a6d80264c09cc0..a0943aa8d1d370fd43bb4f8f111659663e0326fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -891,12 +891,12 @@ static void ci_dpm_powergate_uvd(void *handle, bool gate) if (gate) { /* stop the UVD block */ - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); ci_update_uvd_dpm(adev, gate); } else { - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_UNGATE); ci_update_uvd_dpm(adev, gate); } } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 8ba056a2a5da9ede328b0bc5541d0b745b445df6..8e59e65efd448891fc9d84a7db6e1669c2b892eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -755,74 +755,74 @@ static void cik_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_BONAIRE: - amdgpu_program_register_sequence(adev, - bonaire_mgcg_cgcg_init, - ARRAY_SIZE(bonaire_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - bonaire_golden_registers, - ARRAY_SIZE(bonaire_golden_registers)); - amdgpu_program_register_sequence(adev, - bonaire_golden_common_registers, - ARRAY_SIZE(bonaire_golden_common_registers)); - amdgpu_program_register_sequence(adev, - bonaire_golden_spm_registers, - ARRAY_SIZE(bonaire_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + bonaire_mgcg_cgcg_init, + ARRAY_SIZE(bonaire_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + bonaire_golden_registers, + ARRAY_SIZE(bonaire_golden_registers)); + amdgpu_device_program_register_sequence(adev, + bonaire_golden_common_registers, + ARRAY_SIZE(bonaire_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + bonaire_golden_spm_registers, + ARRAY_SIZE(bonaire_golden_spm_registers)); break; case CHIP_KABINI: - amdgpu_program_register_sequence(adev, - kalindi_mgcg_cgcg_init, - ARRAY_SIZE(kalindi_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - kalindi_golden_registers, - ARRAY_SIZE(kalindi_golden_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_common_registers, - ARRAY_SIZE(kalindi_golden_common_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_spm_registers, - ARRAY_SIZE(kalindi_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_mgcg_cgcg_init, + ARRAY_SIZE(kalindi_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_registers, + ARRAY_SIZE(kalindi_golden_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_common_registers, + ARRAY_SIZE(kalindi_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_spm_registers, + ARRAY_SIZE(kalindi_golden_spm_registers)); break; case CHIP_MULLINS: - amdgpu_program_register_sequence(adev, - kalindi_mgcg_cgcg_init, - ARRAY_SIZE(kalindi_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - godavari_golden_registers, - ARRAY_SIZE(godavari_golden_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_common_registers, - ARRAY_SIZE(kalindi_golden_common_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_spm_registers, - ARRAY_SIZE(kalindi_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_mgcg_cgcg_init, + ARRAY_SIZE(kalindi_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + godavari_golden_registers, + ARRAY_SIZE(godavari_golden_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_common_registers, + ARRAY_SIZE(kalindi_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_spm_registers, + ARRAY_SIZE(kalindi_golden_spm_registers)); break; case CHIP_KAVERI: - amdgpu_program_register_sequence(adev, - spectre_mgcg_cgcg_init, - ARRAY_SIZE(spectre_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - spectre_golden_registers, - ARRAY_SIZE(spectre_golden_registers)); - amdgpu_program_register_sequence(adev, - spectre_golden_common_registers, - ARRAY_SIZE(spectre_golden_common_registers)); - amdgpu_program_register_sequence(adev, - spectre_golden_spm_registers, - ARRAY_SIZE(spectre_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + spectre_mgcg_cgcg_init, + ARRAY_SIZE(spectre_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + spectre_golden_registers, + ARRAY_SIZE(spectre_golden_registers)); + amdgpu_device_program_register_sequence(adev, + spectre_golden_common_registers, + ARRAY_SIZE(spectre_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + spectre_golden_spm_registers, + ARRAY_SIZE(spectre_golden_spm_registers)); break; case CHIP_HAWAII: - amdgpu_program_register_sequence(adev, - hawaii_mgcg_cgcg_init, - ARRAY_SIZE(hawaii_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - hawaii_golden_registers, - ARRAY_SIZE(hawaii_golden_registers)); - amdgpu_program_register_sequence(adev, - hawaii_golden_common_registers, - ARRAY_SIZE(hawaii_golden_common_registers)); - amdgpu_program_register_sequence(adev, - hawaii_golden_spm_registers, - ARRAY_SIZE(hawaii_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + hawaii_mgcg_cgcg_init, + ARRAY_SIZE(hawaii_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + hawaii_golden_registers, + ARRAY_SIZE(hawaii_golden_registers)); + amdgpu_device_program_register_sequence(adev, + hawaii_golden_common_registers, + ARRAY_SIZE(hawaii_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + hawaii_golden_spm_registers, + ARRAY_SIZE(hawaii_golden_spm_registers)); break; default: break; @@ -1246,7 +1246,7 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) /* disable BM */ pci_clear_master(adev->pdev); /* reset */ - amdgpu_pci_config_reset(adev); + amdgpu_device_pci_config_reset(adev); udelay(100); @@ -1866,7 +1866,7 @@ static int cik_common_early_init(void *handle) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_get_pcie_info(adev); + amdgpu_device_get_pcie_info(adev); return 0; } @@ -1974,77 +1974,77 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_BONAIRE: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_2_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; case CHIP_HAWAII: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_5_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_3_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; case CHIP_KAVERI: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_1_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_1_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; case CHIP_KABINI: case CHIP_MULLINS: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_3_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index a870b354e3f7babc8e24f36929f9792d1cb65f36..d5a05c19708fc9f4fdde684d7d7a19d282ffa45c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -280,7 +280,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ed26dcbc4f795c188dd9b008f3d2280df463276a..6e8278e689b18ef35bd67428573cc8dd15c5dcbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -221,9 +221,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 extra_bits = vm_id & 0xf; + u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -626,7 +626,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -639,7 +639,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); @@ -663,7 +663,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -686,7 +686,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -735,7 +735,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -880,23 +880,23 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (CIK). */ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); /* flush TLB */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 6a9e38a3d2a0bbee4265da23f889b4ebf3710b5c..cee6e8a3ad9c9227c81dcfe2f568daf508686219 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -562,7 +562,7 @@ #define PRIVATE_BASE(x) ((x) << 0) /* scratch */ #define SHARED_BASE(x) ((x) << 16) /* LDS */ -#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 +#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ enum { diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h index 003a131bad474db5d28584735c0e399b1def20d6..567a904804bc694caed08e3072128078fb4cee4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -48,7 +48,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] = 0x00000000, // DB_STENCIL_WRITE_BASE 0x00000000, // DB_STENCIL_WRITE_BASE_HI 0x00000000, // DB_DFSM_CONTROL - 0x00000000, // DB_RENDER_FILTER + 0, // HOLE 0x00000000, // DB_Z_INFO2 0x00000000, // DB_STENCIL_INFO2 0, // HOLE @@ -259,8 +259,8 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] = 0x00000000, // PA_SC_RIGHT_VERT_GRID 0x00000000, // PA_SC_LEFT_VERT_GRID 0x00000000, // PA_SC_HORIZ_GRID - 0x00000000, // PA_SC_FOV_WINDOW_LR - 0x00000000, // PA_SC_FOV_WINDOW_TB + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -701,7 +701,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP 0x00000000, // VGT_DRAW_PAYLOAD_CNTL - 0x00000000, // VGT_INDEX_PAYLOAD_CNTL + 0, // HOLE 0x00000000, // VGT_INSTANCE_STEP_RATE_0 0x00000000, // VGT_INSTANCE_STEP_RATE_1 0, // HOLE diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index fa61d649bb44a5c89126bd7cd680ad88c39ce52f..f576e9cbbc61c613551770fee2f4159dba9f4f8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -259,7 +259,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index a397111c2cedb9d34aa2d4de292af9bbe19e3499..f34bc68aadfb119380e5f6ff1a279a996d4f453a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -145,20 +145,20 @@ static void dce_v10_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 67e670989e81539e9de3ceac09a4d2be99a22407..26378bd6aba45a86e18e1d7ac122182a7730a811 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -154,28 +154,28 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - cz_golden_settings_a11, - ARRAY_SIZE(cz_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_golden_settings_a11, + ARRAY_SIZE(cz_golden_settings_a11)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_golden_settings_a11, - ARRAY_SIZE(stoney_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_settings_a11, + ARRAY_SIZE(stoney_golden_settings_a11)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - polaris11_golden_settings_a11, - ARRAY_SIZE(polaris11_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + polaris11_golden_settings_a11, + ARRAY_SIZE(polaris11_golden_settings_a11)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - polaris10_golden_settings_a11, - ARRAY_SIZE(polaris10_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + polaris10_golden_settings_a11, + ARRAY_SIZE(polaris10_golden_settings_a11)); break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index edef17d93527a585a8379ce4bc4c29f5a14b14b2..9870d83b68c17448df2df0ba770801d4c2329132 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1874,7 +1874,7 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -1889,7 +1889,7 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -2354,7 +2354,7 @@ static void gfx_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) } static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); @@ -2362,10 +2362,10 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id )); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid )); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8))); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -2376,7 +2376,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, WRITE_DATA_DST_SEL(0))); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 83d94c23aa782846367a90a828d7137d7a464aeb..a066c5eda135a782d8e01f730cfc10172210c7c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -48,6 +48,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */ + #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 @@ -2252,7 +2254,7 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -2267,7 +2269,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -2281,9 +2283,9 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, @@ -3237,19 +3239,19 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using the CP (CIK). */ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); - if (vm_id < 8) { + if (vmid < 8) { amdgpu_ring_write(ring, - (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { amdgpu_ring_write(ring, - (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -3260,7 +3262,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, WRITE_DATA_DST_SEL(0))); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -5277,6 +5279,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->wave_front_size = 64; + cu_info->lds_size = 64; } const struct amdgpu_ip_block_version gfx_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d02493cf91759a9e497454dde0dc7a811aebe041..4e694ae9f3082442658af492a430d350722d3f0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -679,55 +679,55 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - ARRAY_SIZE(golden_settings_iceland_a11)); - amdgpu_program_register_sequence(adev, - iceland_golden_common_all, - ARRAY_SIZE(iceland_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_iceland_a11, + ARRAY_SIZE(golden_settings_iceland_a11)); + amdgpu_device_program_register_sequence(adev, + iceland_golden_common_all, + ARRAY_SIZE(iceland_golden_common_all)); break; case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - ARRAY_SIZE(golden_settings_fiji_a10)); - amdgpu_program_register_sequence(adev, - fiji_golden_common_all, - ARRAY_SIZE(fiji_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_golden_common_all, + ARRAY_SIZE(fiji_golden_common_all)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - ARRAY_SIZE(golden_settings_tonga_a11)); - amdgpu_program_register_sequence(adev, - tonga_golden_common_all, - ARRAY_SIZE(tonga_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_golden_common_all, + ARRAY_SIZE(tonga_golden_common_all)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - golden_settings_polaris11_a11, - ARRAY_SIZE(golden_settings_polaris11_a11)); - amdgpu_program_register_sequence(adev, - polaris11_golden_common_all, - ARRAY_SIZE(polaris11_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris11_a11, + ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_device_program_register_sequence(adev, + polaris11_golden_common_all, + ARRAY_SIZE(polaris11_golden_common_all)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - golden_settings_polaris10_a11, - ARRAY_SIZE(golden_settings_polaris10_a11)); - amdgpu_program_register_sequence(adev, - polaris10_golden_common_all, - ARRAY_SIZE(polaris10_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris10_a11, + ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_device_program_register_sequence(adev, + polaris10_golden_common_all, + ARRAY_SIZE(polaris10_golden_common_all)); WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); if (adev->pdev->revision == 0xc7 && ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || @@ -738,26 +738,26 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) } break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - cz_golden_settings_a11, - ARRAY_SIZE(cz_golden_settings_a11)); - amdgpu_program_register_sequence(adev, - cz_golden_common_all, - ARRAY_SIZE(cz_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_golden_settings_a11, + ARRAY_SIZE(cz_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + cz_golden_common_all, + ARRAY_SIZE(cz_golden_common_all)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - ARRAY_SIZE(stoney_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - stoney_golden_settings_a11, - ARRAY_SIZE(stoney_golden_settings_a11)); - amdgpu_program_register_sequence(adev, - stoney_golden_common_all, - ARRAY_SIZE(stoney_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_settings_a11, + ARRAY_SIZE(stoney_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_common_all, + ARRAY_SIZE(stoney_golden_common_all)); break; default: break; @@ -5062,8 +5062,9 @@ static int gfx_v8_0_hw_fini(void *handle) gfx_v8_0_cp_enable(adev, false); gfx_v8_0_rlc_stop(adev); - amdgpu_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); return 0; } @@ -5480,8 +5481,9 @@ static int gfx_v8_0_late_init(void *handle) if (r) return r; - amdgpu_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); return 0; } @@ -5492,10 +5494,10 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade if ((adev->asic_type == CHIP_POLARIS11) || (adev->asic_type == CHIP_POLARIS12)) /* Send msg to SMU via Powerplay */ - amdgpu_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_SMC, - enable ? - AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + enable ? + AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); } @@ -6243,7 +6245,7 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -6252,7 +6254,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); @@ -6273,9 +6275,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, @@ -6326,7 +6328,7 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) } static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); @@ -6334,12 +6336,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0)) | WR_CONFIRM); - if (vm_id < 8) { + if (vmid < 8) { amdgpu_ring_write(ring, - (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { amdgpu_ring_write(ring, - (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -6351,7 +6353,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, WRITE_DATA_DST_SEL(0))); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -6472,10 +6474,10 @@ static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, mutex_unlock(&adev->srbm_mutex); } static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { struct amdgpu_device *adev = ring->adev; - bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; + bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) return; @@ -7114,6 +7116,11 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->wave_front_size = 64; + cu_info->lds_size = 64; } const struct amdgpu_ip_block_version gfx_v8_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6c5289ae67be3f912edeac55c472e7374bef989a..c06479615e8a4c69fc3baa2f4802ffcb9dbefdb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -28,7 +28,6 @@ #include "soc15.h" #include "soc15d.h" -#include "soc15ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" @@ -65,152 +64,84 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin"); MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); -static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = -{ - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15) } +static const struct soc15_reg_golden golden_settings_gc_9_0[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) }; -static const u32 golden_settings_gc_9_0[] = -{ - SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, - SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), 0x00001000, 0x00001000, - SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, - SOC15_REG_OFFSET(GC, 0, mmSQC_CONFIG), 0x03000000, 0x020a2000, - SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, - SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, - SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff, - SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 -}; - -static const u32 golden_settings_gc_9_0_vg10[] = +static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = { - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107, - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, - SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, - SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800 + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) }; -static const u32 golden_settings_gc_9_1[] = -{ - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104, - SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, - SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120, - SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, - SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff, - SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 +static const struct soc15_reg_golden golden_settings_gc_9_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) }; -static const u32 golden_settings_gc_9_1_rv1[] = +static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = { - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000, - SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, - SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800 + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) }; -static const u32 golden_settings_gc_9_x_common[] = +static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { - SOC15_REG_OFFSET(GC, 0, mmGRBM_CAM_INDEX), 0xffffffff, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmGRBM_CAM_DATA), 0xffffffff, 0x2544c382 + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) }; #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 @@ -230,18 +161,18 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_gc_9_0, ARRAY_SIZE(golden_settings_gc_9_0)); - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_gc_9_0_vg10, ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_gc_9_1, ARRAY_SIZE(golden_settings_gc_9_1)); - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_gc_9_1_rv1, ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; @@ -249,7 +180,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) break; } - amdgpu_program_register_sequence(adev, golden_settings_gc_9_x_common, + soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } @@ -1137,8 +1068,8 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; - adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base; - adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; + adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); + adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); /* Primitive Buffer */ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], @@ -1243,23 +1174,24 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) } gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[0].mem_size, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), (adev->gds.mem.total_size + adev->gfx.ngg.gds_reserve_size) >> AMDGPU_GDS_SHIFT); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | PACKET3_DMA_DATA_SRC_SEL(2))); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); - + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gfx.ngg.gds_reserve_size); gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[0].mem_size, 0); + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); amdgpu_ring_commit(ring); @@ -1595,14 +1527,21 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - tmp = 0; - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + tmp = adev->mc.shared_aperture_start >> 48; + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + } } soc15_grbm_select(adev, 0, 0, 0, 0); @@ -2474,7 +2413,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) PACKET3_MAP_QUEUES_PIPE(ring->pipe) | PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ - PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); @@ -3146,6 +3085,8 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size) { + struct amdgpu_device *adev = ring->adev; + gds_base = gds_base >> AMDGPU_GDS_SHIFT; gds_size = gds_size >> AMDGPU_GDS_SHIFT; @@ -3157,22 +3098,22 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, /* GDS Base */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].mem_base, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, gds_base); /* GDS Size */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].mem_size, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, gds_size); /* GWS */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].gws, + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); /* OA */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].oa, + SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, (1 << (oa_size + oa_base)) - (1 << oa_base)); } @@ -3617,13 +3558,9 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg; - - if (ring->adev->flags & AMD_IS_APU) - nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; - else - nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -3643,20 +3580,22 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - nbio_hf_reg->hdp_flush_req_offset, - nbio_hf_reg->hdp_flush_done_offset, + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio_funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + gfx_v9_0_write_data_to_reg(ring, 0, true, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -3665,7 +3604,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); @@ -3687,9 +3626,9 @@ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ @@ -3745,22 +3684,23 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) } static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_lo32 + (2 * vm_id), + hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_hi32 + (2 * vm_id), + hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); gfx_v9_0_write_data_to_reg(ring, usepfp, true, @@ -3768,7 +3708,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for the invalidate to complete */ gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + - eng, 0, 1 << vm_id, 1 << vm_id, 0x20); + eng, 0, 1 << vmid, 1 << vmid, 0x20); /* compute doesn't have PFP */ if (usepfp) { @@ -3811,6 +3751,8 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned int flags) { + struct amdgpu_device *adev = ring->adev; + /* we only allocate 32bit for each seq wb address */ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index f1effadfbaa68645b4eccd78e77879aad7ff4d0d..56f5fe4e2feefdc5b0c5a17620e48fa26b903989 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -23,7 +23,6 @@ #include "amdgpu.h" #include "gfxhub_v1_0.h" -#include "soc15ip.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "gc/gc_9_0_default.h" @@ -144,8 +143,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + if (adev->mc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; @@ -183,31 +189,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - int i; + unsigned num_level, block_size; uint32_t tmp; + int i; + + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->mc.translate_further) + num_level -= 1; + else + block_size -= 9; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + PAGE_TABLE_BLOCK_SIZE, + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 468281f10e8d21b880e7e043bc97d015c14f9cdd..8e28270d1ea969e96b37d6c18bd7e9f2bc50c748 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -222,8 +222,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) @@ -395,10 +395,10 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - BUG_ON(addr & 0xFFFFFF0000000FFFULL); - return addr; + BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, @@ -956,7 +956,7 @@ static int gmc_v6_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 68a85051f4b705a39977d050003e36d575fb4418..86e9d682c59e8760d0751d38908a4c072764e123 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -67,12 +67,12 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - ARRAY_SIZE(golden_settings_iceland_a11)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_iceland_a11, + ARRAY_SIZE(golden_settings_iceland_a11)); break; default: break; @@ -240,8 +240,8 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); } /** @@ -480,10 +480,10 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - BUG_ON(addr & 0xFFFFFF0000000FFFULL); - return addr; + BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } /** @@ -1107,7 +1107,7 @@ static int gmc_v7_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 46ec97e70e5c175412b7eee2a55dc7b800c315f7..9a813d834f1a21e72908cea34a0641343c848e30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -120,44 +120,44 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - golden_settings_polaris11_a11, - ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris11_a11, + ARRAY_SIZE(golden_settings_polaris11_a11)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - golden_settings_polaris10_a11, - ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris10_a11, + ARRAY_SIZE(golden_settings_polaris10_a11)); break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - ARRAY_SIZE(stoney_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_stoney_common, - ARRAY_SIZE(golden_settings_stoney_common)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_stoney_common, + ARRAY_SIZE(golden_settings_stoney_common)); break; default: break; @@ -405,8 +405,8 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); } /** @@ -677,10 +677,10 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - BUG_ON(addr & 0xFFFFFF0000000FFFULL); - return addr; + BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } /** @@ -1212,7 +1212,7 @@ static int gmc_v8_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cc972153d401ce1bc507a0e196e5148d22acdbc8..2719937e09d6bf00a4f78c47cd970ce5f5a51000 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -25,7 +25,6 @@ #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" -#include "soc15ip.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" #include "gc/gc_9_0_sh_mask.h" @@ -35,11 +34,10 @@ #include "mmhub/mmhub_1_0_offset.h" #include "athub/athub_1_0_offset.h" +#include "soc15.h" #include "soc15_common.h" #include "umc/umc_6_0_sh_mask.h" -#include "nbio_v6_1.h" -#include "nbio_v7_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" @@ -74,16 +72,16 @@ static const u32 golden_settings_vega10_hdp[] = 0xf6e, 0x0fffffff, 0x00000000, }; -static const u32 golden_settings_mmhub_1_0_0[] = +static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = { - SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_WRCLI2), 0x00000007, 0xfe5fe0fa, - SOC15_REG_OFFSET(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0), 0x00000030, 0x55555565 + SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa), + SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565) }; -static const u32 golden_settings_athub_1_0_0[] = +static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = { - SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL), 0x0000ff00, 0x00000800, - SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL2), 0x00ff00ff, 0x00080008 + SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800), + SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) }; /* Ecc related register addresses, (BASE + reg offset) */ @@ -250,7 +248,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vm_id_src]; + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; uint32_t status = 0; u64 addr; @@ -264,9 +262,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u)\n", - entry->vm_id_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vm_id, + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, entry->pas_id); dev_err(adev->dev, " at page 0x%016llx from %d\n", addr, entry->client_id); @@ -290,13 +288,13 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } -static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vm_id) +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) { u32 req = 0; - /* invalidate using legacy mode on vm_id*/ + /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vm_id); + PER_VMID_INVALIDATE_REQ, 1 << vmid); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); @@ -332,10 +330,7 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, unsigned i, j; /* flush hdp cache */ - if (adev->flags & AMD_IS_APU) - nbio_v7_0_hdp_flush(adev); - else - nbio_v6_1_hdp_flush(adev); + adev->nbio_funcs->hdp_flush(adev); spin_lock(&adev->mc.invalidate_lock); @@ -474,11 +469,28 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr) +static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start; - BUG_ON(addr & 0xFFFF00000000003FULL); - return addr; + if (!(*flags & AMDGPU_PDE_PTE)) + *addr = adev->vm_manager.vram_base_offset + *addr - + adev->mc.vram_start; + BUG_ON(*addr & 0xFFFF00000000003FULL); + + if (!adev->mc.translate_further) + return; + + if (level == AMDGPU_VM_PDB1) { + /* Set the block fragment size */ + if (!(*flags & AMDGPU_PDE_PTE)) + *flags |= AMDGPU_PDE_BFS(0x9); + + } else if (level == AMDGPU_VM_PDB0) { + if (*flags & AMDGPU_PDE_PTE) + *flags &= ~AMDGPU_PDE_PTE; + else + *flags |= AMDGPU_PTE_TF; + } } static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { @@ -502,6 +514,14 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_gart_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000000000000000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } @@ -614,14 +634,16 @@ static int gmc_v9_0_late_init(void *handle) for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) BUG_ON(vm_inv_eng[i] > 16); - r = gmc_v9_0_ecc_available(adev); - if (r == 1) { - DRM_INFO("ECC is active.\n"); - } else if (r == 0) { - DRM_INFO("ECC is not present.\n"); - } else { - DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); - return r; + if (adev->asic_type == CHIP_VEGA10) { + r = gmc_v9_0_ecc_available(adev); + if (r == 1) { + DRM_INFO("ECC is active.\n"); + } else if (r == 0) { + DRM_INFO("ECC is not present.\n"); + } else { + DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); + return r; + } } return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); @@ -633,8 +655,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = 0; if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -700,8 +722,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) /* size in MB on si */ adev->mc.mc_vram_size = - ((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) : - nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL; + adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->mc.real_vram_size = adev->mc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { @@ -769,11 +790,14 @@ static int gmc_v9_0_sw_init(void *handle) switch (adev->asic_type) { case CHIP_RAVEN: adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; - if (adev->rev_id == 0x0 || adev->rev_id == 0x1) + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); - else - /* vm_size is 64GB for legacy 2-level page support */ - amdgpu_vm_adjust_size(adev, 64, 9, 1, 48); + } else { + /* vm_size is 128TB + 512GB for legacy 3-level page support */ + amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48); + adev->mc.translate_further = + adev->vm_manager.num_level > 1; + } break; case CHIP_VEGA10: /* XXX Don't know how to get VRAM type yet. */ @@ -883,17 +907,18 @@ static int gmc_v9_0_sw_fini(void *handle) static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) { + switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, ARRAY_SIZE(golden_settings_mmhub_1_0_0)); - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); break; @@ -913,9 +938,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) bool value; u32 tmp; - amdgpu_program_register_sequence(adev, - golden_settings_vega10_hdp, - ARRAY_SIZE(golden_settings_vega10_hdp)); + amdgpu_device_program_register_sequence(adev, + golden_settings_vega10_hdp, + ARRAY_SIZE(golden_settings_vega10_hdp)); if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -948,10 +973,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* After HDP is initialized, flush HDP.*/ - if (adev->flags & AMD_IS_APU) - nbio_v7_0_hdp_flush(adev); - else - nbio_v6_1_hdp_flush(adev); + adev->nbio_funcs->hdp_flush(adev); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -1036,7 +1058,7 @@ static int gmc_v9_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index bd592cb39f3708d72c645aecda67ca09fd521272..c4e4be3dd31d87de5af944b50ec97e2e2bac628e 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -259,7 +259,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index f33d1ffdb20b2dd438ff014de4ce5e509ba0488d..d9e9e52a0defab36a12057ed2354b4bf7aebdd73 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1682,8 +1682,8 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate) if (gate) { /* stop the UVD block */ - ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); kv_update_uvd_dpm(adev, gate); if (pi->caps_uvd_pg) /* power off the UVD block */ @@ -1695,8 +1695,8 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate) /* re-init the UVD block */ kv_update_uvd_dpm(adev, gate); - ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_UNGATE); } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index bd160d8700e0fcb816f44e276fa0ead1391c0fa5..ffd5b7ee49c46d7413d0b225db7fbc39f3ef5a93 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -23,7 +23,6 @@ #include "amdgpu.h" #include "mmhub_v1_0.h" -#include "soc15ip.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "mmhub/mmhub_1_0_default.h" @@ -156,10 +155,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); - tmp = mmVM_L2_CNTL3_DEFAULT; - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); + if (adev->mc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } tmp = mmVM_L2_CNTL4_DEFAULT; tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); @@ -197,32 +201,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - int i; + unsigned num_level, block_size; uint32_t tmp; + int i; + + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->mc.translate_further) + num_level -= 1; + else + block_size -= 9; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - ENABLE_CONTEXT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_DEPTH, adev->vm_manager.num_level); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + PAGE_TABLE_BLOCK_SIZE, + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index ad9054e3903c3b1577670c6734e7b56d426ea03d..271452d3999a1d04b571a8ebc3e075a11b1f5db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -22,7 +22,6 @@ */ #include "amdgpu.h" -#include "soc15ip.h" #include "nbio/nbio_6_1_offset.h" #include "nbio/nbio_6_1_sh_mask.h" #include "gc/gc_9_0_offset.h" @@ -254,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL, false); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -278,7 +277,7 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, int r; /* trigger gpu-reset by hypervisor only if TDR disbaled */ - if (amdgpu_lockup_timeout == 0) { + if (!amdgpu_gpu_recovery) { /* see what event we get */ r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index df52824c0cd416154251847b247b4ef638c99c03..9fc1c37344cebeaa468941d15552fe5e15b102e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -279,32 +279,32 @@ void xgpu_vi_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - xgpu_fiji_mgcg_cgcg_init, - ARRAY_SIZE( - xgpu_fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - xgpu_fiji_golden_settings_a10, - ARRAY_SIZE( - xgpu_fiji_golden_settings_a10)); - amdgpu_program_register_sequence(adev, - xgpu_fiji_golden_common_all, - ARRAY_SIZE( - xgpu_fiji_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + xgpu_fiji_mgcg_cgcg_init, + ARRAY_SIZE( + xgpu_fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + xgpu_fiji_golden_settings_a10, + ARRAY_SIZE( + xgpu_fiji_golden_settings_a10)); + amdgpu_device_program_register_sequence(adev, + xgpu_fiji_golden_common_all, + ARRAY_SIZE( + xgpu_fiji_golden_common_all)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - xgpu_tonga_mgcg_cgcg_init, - ARRAY_SIZE( - xgpu_tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - xgpu_tonga_golden_settings_a11, - ARRAY_SIZE( - xgpu_tonga_golden_settings_a11)); - amdgpu_program_register_sequence(adev, - xgpu_tonga_golden_common_all, - ARRAY_SIZE( - xgpu_tonga_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + xgpu_tonga_mgcg_cgcg_init, + ARRAY_SIZE( + xgpu_tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + xgpu_tonga_golden_settings_a11, + ARRAY_SIZE( + xgpu_tonga_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + xgpu_tonga_golden_common_all, + ARRAY_SIZE( + xgpu_tonga_golden_common_all)); break; default: BUG_ON("Doesn't support chip type.\n"); @@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL, false); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -545,7 +545,7 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, int r; /* trigger gpu-reset by hypervisor only if TDR disbaled */ - if (amdgpu_lockup_timeout == 0) { + if (!amdgpu_gpu_recovery) { /* see what event we get */ r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 76db711097c7c0d51d7b8b83f4f1c1211a5a6a0a..d4da663d5eb0c7aed8826c72302008b757cd8d68 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -24,7 +24,6 @@ #include "amdgpu_atombios.h" #include "nbio_v6_1.h" -#include "soc15ip.h" #include "nbio/nbio_6_1_default.h" #include "nbio/nbio_6_1_offset.h" #include "nbio/nbio_6_1_sh_mask.h" @@ -34,7 +33,7 @@ #define smnPCIE_CNTL2 0x11180070 #define smnPCIE_CONFIG_CNTL 0x11180044 -u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -44,19 +43,7 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) return tmp; } -u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx) -{ - return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); -} - -void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val) -{ - WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); -} - -void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, @@ -66,26 +53,23 @@ void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) +static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) { WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } -u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE); } -static const u32 nbio_sdma_doorbell_range_reg[] = -{ - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) -}; - -void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, +static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index) { - u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -93,17 +77,18 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); - WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); + WREG32(reg, doorbell_range); + } -void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) { WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); } -void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) { u32 tmp = 0; @@ -122,8 +107,8 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, } -void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index) +static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) { u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); @@ -136,7 +121,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); } -void nbio_v6_1_ih_control(struct amdgpu_device *adev) +static void nbio_v6_1_ih_control(struct amdgpu_device *adev) { u32 interrupt_cntl; @@ -152,8 +137,8 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } -void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t def, data; @@ -180,8 +165,8 @@ void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, WREG32_PCIE(smnCPM_CONTROL, data); } -void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) { uint32_t def, data; @@ -200,7 +185,8 @@ void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, WREG32_PCIE(smnPCIE_CNTL2, data); } -void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) +static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) { int data; @@ -215,9 +201,27 @@ void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) *flags |= AMD_CG_SUPPORT_BIF_LS; } -const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { - .hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ), - .hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE), +static u32 nbio_v6_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX); +} + +static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA); +} + +static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -232,12 +236,7 @@ const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK }; -const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data = { - .index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX), - .data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA), -}; - -void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) +static void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) { uint32_t reg; @@ -254,7 +253,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) } } -void nbio_v6_1_init_registers(struct amdgpu_device *adev) +static void nbio_v6_1_init_registers(struct amdgpu_device *adev) { uint32_t def, data; @@ -265,3 +264,25 @@ void nbio_v6_1_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); } + +const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { + .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg, + .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset, + .get_rev_id = nbio_v6_1_get_rev_id, + .mc_access_enable = nbio_v6_1_mc_access_enable, + .hdp_flush = nbio_v6_1_hdp_flush, + .get_memsize = nbio_v6_1_get_memsize, + .sdma_doorbell_range = nbio_v6_1_sdma_doorbell_range, + .enable_doorbell_aperture = nbio_v6_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v6_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v6_1_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v6_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v6_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v6_1_get_clockgating_state, + .ih_control = nbio_v6_1_ih_control, + .init_registers = nbio_v6_1_init_registers, + .detect_hw_virt = nbio_v6_1_detect_hw_virt, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index 14ca8d45a46c6cb059eaeb2ccad6ca365e93a0bd..0743a6f016f37cfb793166c54c78050c8d199d67 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -26,30 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; -extern const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data; -int nbio_v6_1_init(struct amdgpu_device *adev); -u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx); -void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val); -void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable); -void nbio_v6_1_hdp_flush(struct amdgpu_device *adev); -u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev); -void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index); -void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable); -void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, - bool enable); -void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); -void nbio_v6_1_ih_control(struct amdgpu_device *adev); -u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev); -void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable); -void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable); -void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); -void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev); -void nbio_v6_1_init_registers(struct amdgpu_device *adev); +extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 1fb77174e02c5a1df43a97bbf286ccb8fb35e463..17a9131a459846b9a6075d517af548b4c1efd05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -24,7 +24,6 @@ #include "amdgpu_atombios.h" #include "nbio_v7_0.h" -#include "soc15ip.h" #include "nbio/nbio_7_0_default.h" #include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" @@ -32,7 +31,10 @@ #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c -u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 + +static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -42,19 +44,7 @@ u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) return tmp; } -u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx) -{ - return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); -} - -void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val) -{ - WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); -} - -void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) +static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, @@ -63,26 +53,23 @@ void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) +static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) { WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); } -u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) +static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); } -static const u32 nbio_sdma_doorbell_range_reg[] = +static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index) { - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) -}; + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); -void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index) -{ - u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + u32 doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -90,17 +77,23 @@ void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); - WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); + WREG32(reg, doorbell_range); } -void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) +static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) { WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); } -void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index) +static void nbio_v7_0_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + +} + +static void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) { u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); @@ -130,8 +123,8 @@ static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t o WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data); } -void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t def, data; @@ -169,7 +162,43 @@ void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data); } -void nbio_v7_0_ih_control(struct amdgpu_device *adev) +static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CNTL2); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE(smnPCIE_CNTL2, data); +} + +static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE(smnCPM_CONTROL); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE(smnPCIE_CNTL2); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static void nbio_v7_0_ih_control(struct amdgpu_device *adev) { u32 interrupt_cntl; @@ -185,9 +214,27 @@ void nbio_v7_0_ih_control(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } +static u32 nbio_v7_0_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v7_0_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v7_0_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); +} + +static u32 nbio_v7_0_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); +} + const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { - .hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ), - .hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE), .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -202,7 +249,35 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data = { - .index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), - .data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2) +static void nbio_v7_0_detect_hw_virt(struct amdgpu_device *adev) +{ + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; +} + +static void nbio_v7_0_init_registers(struct amdgpu_device *adev) +{ + +} + +const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { + .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg, + .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset, + .get_rev_id = nbio_v7_0_get_rev_id, + .mc_access_enable = nbio_v7_0_mc_access_enable, + .hdp_flush = nbio_v7_0_hdp_flush, + .get_memsize = nbio_v7_0_get_memsize, + .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, + .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v7_0_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_0_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_0_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_0_get_clockgating_state, + .ih_control = nbio_v7_0_ih_control, + .init_registers = nbio_v7_0_init_registers, + .detect_hw_virt = nbio_v7_0_detect_hw_virt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h index df8fa90f40d7f6a4ed8ef05611233f7171ae44ee..508d549c50291fc89a40539b2a33642275b74a0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -26,24 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; -extern const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; -int nbio_v7_0_init(struct amdgpu_device *adev); -u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx); -void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val); -void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable); -void nbio_v7_0_hdp_flush(struct amdgpu_device *adev); -u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev); -void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index); -void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable); -void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); -void nbio_v7_0_ih_control(struct amdgpu_device *adev); -u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev); -void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable); +extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 78fe3f2917a08f47bb92651fc2705fef30bc128c..5a9fe24697f93832f0c7fd9e8e700760f250be91 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -30,7 +30,6 @@ #include "soc15_common.h" #include "psp_v10_0.h" -#include "soc15ip.h" #include "mp/mp_10_0_offset.h" #include "gc/gc_9_1_offset.h" #include "sdma0/sdma0_4_1_offset.h" @@ -298,9 +297,10 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, } static int -psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, - unsigned int *sram_data_reg_offset, - enum AMDGPU_UCODE_ID ucode_id) +psp_v10_0_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) { int ret = 0; @@ -395,7 +395,7 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp, uint32_t *ucode_mem = NULL; struct amdgpu_device *adev = psp->adev; - err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + err = psp_v10_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, &fw_sram_data_reg_offset, ucode_type); if (err) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index e75a23d858ef1a2ffc068955ad6954563025b16e..19bd1934e63d7896acf88d50995e92011b69dcf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -31,7 +31,6 @@ #include "soc15_common.h" #include "psp_v3_1.h" -#include "soc15ip.h" #include "mp/mp_9_0_offset.h" #include "mp/mp_9_0_sh_mask.h" #include "gc/gc_9_0_offset.h" @@ -410,9 +409,10 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, } static int -psp_v3_1_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, - unsigned int *sram_data_reg_offset, - enum AMDGPU_UCODE_ID ucode_id) +psp_v3_1_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) { int ret = 0; @@ -507,7 +507,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp, uint32_t *ucode_mem = NULL; struct amdgpu_device *adev = psp->adev; - err = psp_v3_1_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + err = psp_v3_1_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, &fw_sram_data_reg_offset, ucode_type); if (err) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 121e628e7cdb0ee16902733994f501d491ef539f..d4787ad4d346b0592f8e90103410df3cf860c59e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -93,12 +93,12 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - ARRAY_SIZE(golden_settings_iceland_a11)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_iceland_a11, + ARRAY_SIZE(golden_settings_iceland_a11)); break; default: break; @@ -246,15 +246,13 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -600,7 +598,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -613,7 +611,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -639,7 +637,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -662,7 +660,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -715,7 +713,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -861,14 +859,14 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VI). */ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); @@ -876,7 +874,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for flush */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c8c93f9dac2143c2fdbf49a0087eb55271f49cb4..521978c4053744abae0061ea04d41af362a9f443 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -192,47 +192,47 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - golden_settings_polaris11_a11, - ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris11_a11, + ARRAY_SIZE(golden_settings_polaris11_a11)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - golden_settings_polaris10_a11, - ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris10_a11, + ARRAY_SIZE(golden_settings_polaris10_a11)); break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - cz_golden_settings_a11, - ARRAY_SIZE(cz_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_golden_settings_a11, + ARRAY_SIZE(cz_golden_settings_a11)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - ARRAY_SIZE(stoney_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - stoney_golden_settings_a11, - ARRAY_SIZE(stoney_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_settings_a11, + ARRAY_SIZE(stoney_golden_settings_a11)); break; default: break; @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 wptr; - if (ring->use_doorbell) { + if (ring->use_doorbell || ring->use_pollmem) { /* XXX check if swapping is necessary on BE */ wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; } else { @@ -380,10 +380,13 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; - /* XXX check if swapping is necessary on BE */ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); + } else if (ring->use_pollmem) { + u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; + + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -414,15 +417,13 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -718,10 +719,14 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); + if (ring->use_pollmem) + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + ENABLE, 1); else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + ENABLE, 0); WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); /* enable DMA RB */ @@ -860,7 +865,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -873,7 +878,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -899,7 +904,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -922,7 +927,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -974,7 +979,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1120,14 +1125,14 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VI). */ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); @@ -1135,7 +1140,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for flush */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | @@ -1203,9 +1208,13 @@ static int sdma_v3_0_sw_init(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = (i == 0) ? - AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + if (!amdgpu_sriov_vf(adev)) { + ring->use_doorbell = true; + ring->doorbell_index = (i == 0) ? + AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + } else { + ring->use_pollmem = true; + } sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4c55f21e37a8b78172b51e7cf2d899f521c61d48..e92fb372bc99738dad957334ba40d3a138c57636 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -27,7 +27,6 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" -#include "soc15ip.h" #include "sdma0/sdma0_4_0_offset.h" #include "sdma0/sdma0_4_0_sh_mask.h" #include "sdma1/sdma1_4_0_offset.h" @@ -53,95 +52,83 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static const u32 golden_settings_sdma_4[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0x003ff006, 0x0003c000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CHICKEN_BITS), 0xfe931f07, 0x02831f07, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), 0xffffffff, 0x3f000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), 0x003ff000, 0x0003c000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 +static const struct soc15_reg_golden golden_settings_sdma_4[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; -static const u32 golden_settings_sdma_vg10[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 +static const struct soc15_reg_golden golden_settings_sdma_vg10[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002) }; -static const u32 golden_settings_sdma_4_1[] = +static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0 + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; -static const u32 golden_settings_sdma_rv1[] = +static const struct soc15_reg_golden golden_settings_sdma_rv1[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002 + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002) }; -static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, + u32 instance, u32 offset) { - u32 base = 0; - - switch (instance) { - case 0: - base = SDMA0_BASE.instance[0].segment[0]; - break; - case 1: - base = SDMA1_BASE.instance[0].segment[0]; - break; - default: - BUG(); - break; - } - - return base + internal_offset; + return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : + (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_4, ARRAY_SIZE(golden_settings_sdma_4)); - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_vg10, ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_4_1, ARRAY_SIZE(golden_settings_sdma_4_1)); - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_rv1, ARRAY_SIZE(golden_settings_sdma_rv1)); break; @@ -265,8 +252,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; wptr = &local_wptr; - lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", me, highbit, lowbit); @@ -315,8 +302,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } } @@ -343,15 +330,13 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -370,13 +355,9 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, */ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg; - - if (ring->adev->flags & AMD_IS_APU) - nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; - else - nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring == &ring->adev->sdma.instance[0].ring) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; @@ -386,8 +367,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_done_offset << 2); - amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_req_offset << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); amdgpu_ring_write(ring, ref_and_mask); /* reference */ amdgpu_ring_write(ring, ref_and_mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | @@ -396,6 +377,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE)); @@ -460,12 +443,12 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); } sdma0->ready = false; @@ -522,18 +505,18 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), phase_quantum); } - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); } } @@ -557,9 +540,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), f32_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); } } @@ -587,48 +570,48 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) ring = &adev->sdma.instance[i].ring; wb_offset = (ring->rptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* set the wb address whether it's enabled or not */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); } - doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); if (ring->use_doorbell) { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); @@ -637,55 +620,53 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } else { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); } - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); - else - nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index); if (amdgpu_sriov_vf(adev)) sdma_v4_0_ring_set_wptr(ring); /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), temp); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); if (!amdgpu_sriov_vf(adev)) { /* unhalt engine */ - temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); } /* setup the wptr shadow polling */ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); if (amdgpu_sriov_vf(adev)) wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); else wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif /* enable DMA IBs */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); ring->ready = true; @@ -816,12 +797,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); for (j = 0; j < fw_size; j++) - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); } return 0; @@ -886,7 +867,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -899,7 +880,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -925,7 +906,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -948,7 +929,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u32 tmp = 0; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -1000,7 +981,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1152,23 +1133,24 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VEGA10). */ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); /* flush TLB */ @@ -1183,8 +1165,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* reference */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } @@ -1317,7 +1299,7 @@ static bool sdma_v4_0_is_idle(void *handle) u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { - u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); + u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) return false; @@ -1333,8 +1315,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); - sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); + sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) return 0; @@ -1358,8 +1340,8 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? - sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); + sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : + sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); sdma_cntl = RREG32(reg_offset); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 49eef3090f085013e51ba6fcafcf75cb7ee0eecc..543101d5a5edd053c83beea6de7db5adbf9844eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1390,65 +1390,65 @@ static void si_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TAHITI: - amdgpu_program_register_sequence(adev, - tahiti_golden_registers, - ARRAY_SIZE(tahiti_golden_registers)); - amdgpu_program_register_sequence(adev, - tahiti_golden_rlc_registers, - ARRAY_SIZE(tahiti_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - tahiti_mgcg_cgcg_init, - ARRAY_SIZE(tahiti_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - tahiti_golden_registers2, - ARRAY_SIZE(tahiti_golden_registers2)); + amdgpu_device_program_register_sequence(adev, + tahiti_golden_registers, + ARRAY_SIZE(tahiti_golden_registers)); + amdgpu_device_program_register_sequence(adev, + tahiti_golden_rlc_registers, + ARRAY_SIZE(tahiti_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + tahiti_mgcg_cgcg_init, + ARRAY_SIZE(tahiti_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + tahiti_golden_registers2, + ARRAY_SIZE(tahiti_golden_registers2)); break; case CHIP_PITCAIRN: - amdgpu_program_register_sequence(adev, - pitcairn_golden_registers, - ARRAY_SIZE(pitcairn_golden_registers)); - amdgpu_program_register_sequence(adev, - pitcairn_golden_rlc_registers, - ARRAY_SIZE(pitcairn_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - pitcairn_mgcg_cgcg_init, - ARRAY_SIZE(pitcairn_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + pitcairn_golden_registers, + ARRAY_SIZE(pitcairn_golden_registers)); + amdgpu_device_program_register_sequence(adev, + pitcairn_golden_rlc_registers, + ARRAY_SIZE(pitcairn_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + pitcairn_mgcg_cgcg_init, + ARRAY_SIZE(pitcairn_mgcg_cgcg_init)); break; case CHIP_VERDE: - amdgpu_program_register_sequence(adev, - verde_golden_registers, - ARRAY_SIZE(verde_golden_registers)); - amdgpu_program_register_sequence(adev, - verde_golden_rlc_registers, - ARRAY_SIZE(verde_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - verde_mgcg_cgcg_init, - ARRAY_SIZE(verde_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - verde_pg_init, - ARRAY_SIZE(verde_pg_init)); + amdgpu_device_program_register_sequence(adev, + verde_golden_registers, + ARRAY_SIZE(verde_golden_registers)); + amdgpu_device_program_register_sequence(adev, + verde_golden_rlc_registers, + ARRAY_SIZE(verde_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + verde_mgcg_cgcg_init, + ARRAY_SIZE(verde_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + verde_pg_init, + ARRAY_SIZE(verde_pg_init)); break; case CHIP_OLAND: - amdgpu_program_register_sequence(adev, - oland_golden_registers, - ARRAY_SIZE(oland_golden_registers)); - amdgpu_program_register_sequence(adev, - oland_golden_rlc_registers, - ARRAY_SIZE(oland_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - oland_mgcg_cgcg_init, - ARRAY_SIZE(oland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + oland_golden_registers, + ARRAY_SIZE(oland_golden_registers)); + amdgpu_device_program_register_sequence(adev, + oland_golden_rlc_registers, + ARRAY_SIZE(oland_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + oland_mgcg_cgcg_init, + ARRAY_SIZE(oland_mgcg_cgcg_init)); break; case CHIP_HAINAN: - amdgpu_program_register_sequence(adev, - hainan_golden_registers, - ARRAY_SIZE(hainan_golden_registers)); - amdgpu_program_register_sequence(adev, - hainan_golden_registers2, - ARRAY_SIZE(hainan_golden_registers2)); - amdgpu_program_register_sequence(adev, - hainan_mgcg_cgcg_init, - ARRAY_SIZE(hainan_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + hainan_golden_registers, + ARRAY_SIZE(hainan_golden_registers)); + amdgpu_device_program_register_sequence(adev, + hainan_golden_registers2, + ARRAY_SIZE(hainan_golden_registers2)); + amdgpu_device_program_register_sequence(adev, + hainan_mgcg_cgcg_init, + ARRAY_SIZE(hainan_mgcg_cgcg_init)); break; @@ -1959,42 +1959,42 @@ int si_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VERDE: case CHIP_TAHITI: case CHIP_PITCAIRN: - amdgpu_ip_block_add(adev, &si_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else - amdgpu_ip_block_add(adev, &dce_v6_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_dma_ip_block); - /* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */ - /* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */ + amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); + /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ + /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_OLAND: - amdgpu_ip_block_add(adev, &si_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else - amdgpu_ip_block_add(adev, &dce_v6_4_ip_block); - amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_dma_ip_block); - /* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */ - /* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */ + amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); + /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ + /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_HAINAN: - amdgpu_ip_block_add(adev, &si_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_dma_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index ee469a906cd371f37ebf1cad85c51afd567584ee..9a29c13990918116a2bbfdaca9a9043f757dbc3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -61,14 +61,14 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ while ((lower_32_bits(ring->wptr) & 7) != 5) amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); - amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); + amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); @@ -221,7 +221,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -234,7 +234,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 4); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -258,7 +258,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -281,7 +281,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -328,7 +328,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -473,25 +473,25 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VI). */ static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vm_id < 8) - amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) + amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); else - amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); + amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8))); amdgpu_ring_write(ring, pd_addr >> 12); /* bits 0-7 are the VM contexts0-7 */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST)); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for invalidate to complete */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0xff << 16); /* retry */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, 0); /* value */ amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 299cb3161b2cc641751798895c0d3bb8b9616f11..ce675a7f179a6a0f3b9f7b254589694541aea3cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3464,6 +3464,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6667)) { max_sclk = 75000; } + if ((adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6665)) { + max_sclk = 60000; + max_mclk = 80000; + } } else if (adev->asic_type == CHIP_OLAND) { if ((adev->pdev->revision == 0xC7) || (adev->pdev->revision == 0x80) || diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index d2c6b80309c8db3560968bc80577a0628ee675b0..60dad63098a2aa4db47de4804174a992fb78039e 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -146,7 +146,7 @@ static void si_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; adev->irq.ih.rptr += 16; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f134ca0c093cf8f33d600396c0045fcf2df3e5c3..a04a033f57de0121e7da812f66d8c8978fc81e32 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -34,7 +34,6 @@ #include "atom.h" #include "amd_pcie.h" -#include "soc15ip.h" #include "uvd/uvd_7_0_offset.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -101,15 +100,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - const struct nbio_pcie_index_data *nbio_pcie_id; - - if (adev->flags & AMD_IS_APU) - nbio_pcie_id = &nbio_v7_0_pcie_index_data; - else - nbio_pcie_id = &nbio_v6_1_pcie_index_data; - - address = nbio_pcie_id->index_offset; - data = nbio_pcie_id->data_offset; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -122,15 +114,9 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - const struct nbio_pcie_index_data *nbio_pcie_id; - if (adev->flags & AMD_IS_APU) - nbio_pcie_id = &nbio_v7_0_pcie_index_data; - else - nbio_pcie_id = &nbio_v6_1_pcie_index_data; - - address = nbio_pcie_id->index_offset; - data = nbio_pcie_id->data_offset; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -242,41 +228,9 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return nbio_v7_0_get_memsize(adev); - else - return nbio_v6_1_get_memsize(adev); + return adev->nbio_funcs->get_memsize(adev); } -static const u32 vega10_golden_init[] = -{ -}; - -static const u32 raven_golden_init[] = -{ -}; - -static void soc15_init_golden_registers(struct amdgpu_device *adev) -{ - /* Some of the registers might be dependent on GRBM_GFX_INDEX */ - mutex_lock(&adev->grbm_idx_mutex); - - switch (adev->asic_type) { - case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, - vega10_golden_init, - ARRAY_SIZE(vega10_golden_init)); - break; - case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, - raven_golden_init, - ARRAY_SIZE(raven_golden_init)); - break; - default: - break; - } - mutex_unlock(&adev->grbm_idx_mutex); -} static u32 soc15_get_xclk(struct amdgpu_device *adev) { return adev->clock.spll.reference_freq; @@ -332,25 +286,34 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)}, - { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)}, - { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STAT)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)}, - { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)}, +struct soc15_allowed_register_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + bool grbm_indexed; +}; + + +static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)}, + { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)}, + { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -377,12 +340,9 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, if (indexed) { return soc15_read_indexed_register(adev, se_num, sh_num, reg_offset); } else { - switch (reg_offset) { - case SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG): + if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) return adev->gfx.config.gb_addr_config; - default: - return RREG32(reg_offset); - } + return RREG32(reg_offset); } } @@ -390,10 +350,13 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { uint32_t i; + struct soc15_allowed_register_entry *en; *value = 0; for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { - if (reg_offset != soc15_allowed_read_registers[i].reg_offset) + en = &soc15_allowed_read_registers[i]; + if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + + en->reg_offset)) continue; *value = soc15_get_register_value(adev, @@ -404,6 +367,43 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; } + +/** + * soc15_program_register_sequence - program an array of registers. + * + * @adev: amdgpu_device pointer + * @regs: pointer to the register array + * @array_size: size of the register array + * + * Programs an array or registers with and and or masks. + * This is a helper for setting golden registers. + */ + +void soc15_program_register_sequence(struct amdgpu_device *adev, + const struct soc15_reg_golden *regs, + const u32 array_size) +{ + const struct soc15_reg_golden *entry; + u32 tmp, reg; + int i; + + for (i = 0; i < array_size; ++i) { + entry = ®s[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + + if (entry->and_mask == 0xffffffff) { + tmp = entry->or_mask; + } else { + tmp = RREG32(reg); + tmp &= ~(entry->and_mask); + tmp |= entry->or_mask; + } + WREG32(reg, tmp); + } + +} + + static int soc15_asic_reset(struct amdgpu_device *adev) { u32 i; @@ -428,9 +428,8 @@ static int soc15_asic_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = (adev->flags & AMD_IS_APU) ? - nbio_v7_0_get_memsize(adev) : - nbio_v6_1_get_memsize(adev); + u32 memsize = adev->nbio_funcs->get_memsize(adev); + if (memsize != 0xffffffff) break; udelay(1); @@ -495,14 +494,10 @@ static void soc15_program_aspm(struct amdgpu_device *adev) } static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) + bool enable) { - if (adev->flags & AMD_IS_APU) { - nbio_v7_0_enable_doorbell_aperture(adev, enable); - } else { - nbio_v6_1_enable_doorbell_aperture(adev, enable); - nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); - } + adev->nbio_funcs->enable_doorbell_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -516,50 +511,65 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = int soc15_set_ip_blocks(struct amdgpu_device *adev) { - nbio_v6_1_detect_hw_virt(adev); + /* Set IP register base before any HW register access */ + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_RAVEN: + vega10_reg_base_init(adev); + break; + default: + return -EINVAL; + } + + if (adev->flags & AMD_IS_APU) + adev->nbio_funcs = &nbio_v7_0_funcs; + else + adev->nbio_funcs = &nbio_v6_1_funcs; + + adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) - amdgpu_ip_block_add(adev, &psp_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); if (!amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #else # warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif - amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block); break; case CHIP_RAVEN: - amdgpu_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_ip_block_add(adev, &vega10_ih_ip_block); - amdgpu_ip_block_add(adev, &psp_v10_0_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #else # warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif - amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; default: return -EINVAL; @@ -570,10 +580,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return nbio_v7_0_get_rev_id(adev); - else - return nbio_v6_1_get_rev_id(adev); + return adev->nbio_funcs->get_rev_id(adev); } static const struct amdgpu_asic_funcs soc15_asic_funcs = @@ -609,8 +616,8 @@ static int soc15_common_early_init(void *handle) adev->asic_funcs = &soc15_asic_funcs; - if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && - (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && + (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) psp_enabled = true; adev->rev_id = soc15_get_rev_id(adev); @@ -659,8 +666,8 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_MMHUB; + adev->pg_flags = AMD_PG_SUPPORT_SDMA; + adev->external_rev_id = 0x1; break; default: @@ -675,7 +682,7 @@ static int soc15_common_early_init(void *handle) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_get_pcie_info(adev); + amdgpu_device_get_pcie_info(adev); return 0; } @@ -709,15 +716,12 @@ static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* move the golden regs per IP block */ - soc15_init_golden_registers(adev); /* enable pcie gen2/3 link */ soc15_pcie_gen3_enable(adev); /* enable aspm */ soc15_program_aspm(adev); /* setup nbio registers */ - if (!(adev->flags & AMD_IS_APU)) - nbio_v6_1_init_registers(adev); + adev->nbio_funcs->init_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); @@ -878,9 +882,9 @@ static int soc15_common_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: - nbio_v6_1_update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - nbio_v6_1_update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -894,9 +898,9 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: - nbio_v7_0_update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - nbio_v6_1_update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -921,7 +925,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - nbio_v6_1_get_clockgating_state(adev, flags); + adev->nbio_funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index acb3cdb119f24c2daa6ee23a97a1f4b8e7365b54..26b3feac5d062328d253e69ef212721447463813 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -29,8 +29,28 @@ extern const struct amd_ip_funcs soc15_common_ip_funcs; +struct soc15_reg_golden { + u32 hwip; + u32 instance; + u32 segment; + u32 reg; + u32 and_mask; + u32 or_mask; +}; + +#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg + +#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ + { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } + void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int soc15_set_ip_blocks(struct amdgpu_device *adev); +void soc15_program_register_sequence(struct amdgpu_device *adev, + const struct soc15_reg_golden *registers, + const u32 array_size); + +int vega10_reg_base_init(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 7a8e4e28abb2a9bf3b68933f80c21596920c4781..def865067edd2b2cadc820824442b0006b057b2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -24,72 +24,28 @@ #ifndef __SOC15_COMMON_H__ #define __SOC15_COMMON_H__ -struct nbio_hdp_flush_reg { - u32 hdp_flush_req_offset; - u32 hdp_flush_done_offset; - u32 ref_and_mask_cp0; - u32 ref_and_mask_cp1; - u32 ref_and_mask_cp2; - u32 ref_and_mask_cp3; - u32 ref_and_mask_cp4; - u32 ref_and_mask_cp5; - u32 ref_and_mask_cp6; - u32 ref_and_mask_cp7; - u32 ref_and_mask_cp8; - u32 ref_and_mask_cp9; - u32 ref_and_mask_sdma0; - u32 ref_and_mask_sdma1; -}; - -struct nbio_pcie_index_data { - u32 index_offset; - u32 data_offset; -}; - /* Register Access Macros */ -#define SOC15_REG_OFFSET(ip, inst, reg) (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))) +#define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define WREG32_FIELD15(ip, idx, reg, field, val) \ - WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + WREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ + (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ + & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) #define RREG32_SOC15(ip, inst, reg) \ - RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg)))))) + RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ - RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset) + RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset) #define WREG32_SOC15(ip, inst, reg, value) \ - WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ - WREG32_NO_KIQ( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + WREG32_NO_KIQ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ - WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value) + WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index aa4e320e31f8b82178aaa7d2ed21c74b03209149..5995ffc183de0612c1f64fd6aa76e6874b71efdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -270,7 +270,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b13ae34be1c2f6d0466ffe905be74472a1faaed5..8ab10c220910c6a176ff7fe36e1c945eda3ecfe0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -541,7 +541,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index a4b0f1d842b734aedd2c74f3361bdbbd24879889..c1fe30cdba32021532123fa2174ac89ee745436e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -556,7 +556,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 0e8b887cf03e78ddb236d590ad48bcdda61f9180..b2bfedaf57f197b38e98dce034d12c1d3359ca64 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -37,6 +37,9 @@ #include "gmc/gmc_8_1_d.h" #include "vi.h" +/* Polaris10/11/12 firmware version */ +#define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8)) + static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -58,7 +61,9 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, */ static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) { - return ((adev->asic_type >= CHIP_POLARIS10) && (adev->asic_type <= CHIP_POLARIS12)); + return ((adev->asic_type >= CHIP_POLARIS10) && + (adev->asic_type <= CHIP_POLARIS12) && + (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); } /** @@ -411,11 +416,19 @@ static int uvd_v6_0_sw_init(void *handle) if (r) return r; - if (uvd_v6_0_enc_support(adev)) { - struct amd_sched_rq *rq; + if (!uvd_v6_0_enc_support(adev)) { + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + adev->uvd.ring_enc[i].funcs = NULL; + + adev->uvd.irq.num_types = 1; + adev->uvd.num_enc_rings = 0; + + DRM_INFO("UVD ENC is disabled\n"); + } else { + struct drm_sched_rq *rq; ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); @@ -456,7 +469,7 @@ static int uvd_v6_0_sw_fini(void *handle) return r; if (uvd_v6_0_enc_support(adev)) { - amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); + drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) amdgpu_ring_fini(&adev->uvd.ring_enc[i]); @@ -1028,10 +1041,10 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -1050,24 +1063,24 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); } static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { uint32_t reg; - if (vm_id < 8) - reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id; + if (vmid < 8) + reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; else - reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8; + reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); amdgpu_ring_write(ring, reg << 2); @@ -1079,7 +1092,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); amdgpu_ring_write(ring, 0x8); @@ -1088,7 +1101,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0)); - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); amdgpu_ring_write(ring, 0xC); } @@ -1127,14 +1140,14 @@ static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring) } static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, pd_addr >> 12); amdgpu_ring_write(ring, HEVC_ENC_CMD_FLUSH_TLB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); } static bool uvd_v6_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 660fa41dc877aa6c920184532c6e212fe6077bfb..6b95f4f344b5315cb295a28a0868f008b8652acb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -29,7 +29,6 @@ #include "soc15_common.h" #include "mmsch_v1_0.h" -#include "soc15ip.h" #include "uvd/uvd_7_0_offset.h" #include "uvd/uvd_7_0_sh_mask.h" #include "vce/vce_4_0_offset.h" @@ -385,7 +384,7 @@ static int uvd_v7_0_early_init(void *handle) static int uvd_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int i, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -416,8 +415,8 @@ static int uvd_v7_0_sw_init(void *handle) } ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); @@ -472,7 +471,7 @@ static int uvd_v7_0_sw_fini(void *handle) if (r) return r; - amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); + drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) amdgpu_ring_fini(&adev->uvd.ring_enc[i]); @@ -1086,6 +1085,8 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + struct amdgpu_device *adev = ring->adev; + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, @@ -1123,6 +1124,7 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE); @@ -1141,6 +1143,8 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, */ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0)); amdgpu_ring_write(ring, 0); @@ -1155,6 +1159,8 @@ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) */ static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); amdgpu_ring_write(ring, 1); } @@ -1212,11 +1218,13 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); @@ -1238,10 +1246,10 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); @@ -1250,6 +1258,8 @@ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -1264,6 +1274,8 @@ static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1, uint32_t mask) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -1279,25 +1291,26 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, } static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - uint32_t data0, data1, mask; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; + uint32_t data0, data1, mask; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; data1 = upper_32_bits(pd_addr); uvd_v7_0_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); uvd_v7_0_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); @@ -1309,36 +1322,47 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vm_id; - mask = 1 << vm_id; + data1 = 1 << vmid; + mask = 1 << vmid; uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); } +static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + struct amdgpu_device *adev = ring->adev; + + for (i = 0; i < count; i++) + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + +} + static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, HEVC_ENC_CMD_END); } static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); @@ -1350,8 +1374,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, 1 << vmid); } #if 0 @@ -1681,7 +1705,7 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, @@ -1700,7 +1724,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate, .test_ring = uvd_v7_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v7_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index cf81065e3c5ab95030f24f4a883924f27c71bc90..a5355eb689f145a81ec8296d8cfdcc27600b0012 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -834,24 +834,24 @@ static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) } static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); } static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, pd_addr >> 12); amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, VCE_CMD_END); } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index f2f7136500744cc5cabf64a6814db31856e25bb2..7cf2eef68cf24d32c1dc2b50dacd78ccc58d9b3b 100755 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -32,7 +32,6 @@ #include "soc15_common.h" #include "mmsch_v1_0.h" -#include "soc15ip.h" #include "vce/vce_4_0_offset.h" #include "vce/vce_4_0_default.h" #include "vce/vce_4_0_sh_mask.h" @@ -424,7 +423,7 @@ static int vce_v4_0_sw_init(void *handle) if (r) return r; - size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; + size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) size += VCE_V4_0_FW_SIZE; @@ -939,10 +938,10 @@ static int vce_v4_0_set_powergating_state(void *handle, #endif static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); @@ -966,25 +965,26 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) } static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); @@ -996,8 +996,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, 1 << vmid); } static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index e4673f792545b1491bce7d4da9cb9881e469c461..b99e15c43e45cf847b4699f86e5e1c24187fe279 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -28,7 +28,6 @@ #include "soc15d.h" #include "soc15_common.h" -#include "soc15ip.h" #include "vcn/vcn_1_0_offset.h" #include "vcn/vcn_1_0_sh_mask.h" #include "hdp/hdp_4_0_offset.h" @@ -744,6 +743,8 @@ static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring) */ static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, 0); @@ -761,6 +762,8 @@ static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) */ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); @@ -777,6 +780,8 @@ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + struct amdgpu_device *adev = ring->adev; + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, @@ -812,6 +817,8 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 */ static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); amdgpu_ring_write(ring, 1); } @@ -826,11 +833,13 @@ static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) */ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); @@ -846,6 +855,8 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -860,6 +871,8 @@ static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1, uint32_t mask) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -875,25 +888,26 @@ static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, } static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - uint32_t data0, data1, mask; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; + uint32_t data0, data1, mask; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; data1 = upper_32_bits(pd_addr); vcn_v1_0_dec_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); vcn_v1_0_dec_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); @@ -905,8 +919,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vm_id; - mask = 1 << vm_id; + data1 = 1 << vmid; + mask = 1 << vmid; vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); } @@ -997,38 +1011,39 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) * Write enc ring commands to execute the indirect buffer */ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCN_ENC_CMD_IB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); } static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); @@ -1040,8 +1055,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, 1 << vmid); } static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1077,6 +1092,17 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + struct amdgpu_device *adev = ring->adev; + + for (i = 0; i < count; i++) + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + +} + + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1100,7 +1126,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, @@ -1118,7 +1144,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate, .test_ring = amdgpu_vcn_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = vcn_v1_0_ring_insert_nop, .insert_start = vcn_v1_0_dec_ring_insert_start, .insert_end = vcn_v1_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index ca778cd4e6e84c1d4e0005dc5e8545207de1c787..b69ceafb78883e648609dab5927ed7049c720690 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -25,8 +25,6 @@ #include "amdgpu_ih.h" #include "soc15.h" - -#include "soc15ip.h" #include "oss/osssys_4_0_offset.h" #include "oss/osssys_4_0_sh_mask.h" @@ -97,10 +95,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ vega10_ih_disable_interrupts(adev); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_ih_control(adev); - else - nbio_v6_1_ih_control(adev); + adev->nbio_funcs->ih_control(adev); ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -151,10 +146,8 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ENABLE, 0); } WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); - else - nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, @@ -334,8 +327,8 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, entry->client_id = dw[0] & 0xff; entry->src_id = (dw[0] >> 8) & 0xff; entry->ring_id = (dw[0] >> 16) & 0xff; - entry->vm_id = (dw[0] >> 24) & 0xf; - entry->vm_id_src = (dw[0] >> 31); + entry->vmid = (dw[0] >> 24) & 0xf; + entry->vmid_src = (dw[0] >> 31); entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); entry->timestamp_src = dw[2] >> 31; entry->pas_id = dw[3] & 0xffff; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c new file mode 100644 index 0000000000000000000000000000000000000000..b7bdd04793d69afb52aff348dc49cf369f1f7ad7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -0,0 +1,56 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15ip.h" + +int vega10_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke beend by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i])); + adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i])); + + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index bb8ca9489546aed8be038d8c67dd86b388289cd0..da2b99c2d95f06953d285b0e6bbd4093e768f423 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -282,29 +282,29 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); break; case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); break; case CHIP_POLARIS11: case CHIP_POLARIS10: @@ -449,14 +449,18 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, static void vi_detect_hw_virtualization(struct amdgpu_device *adev) { - uint32_t reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); - /* bit0: 0 means pf and 1 means vf */ - /* bit31: 0 means disable IOV and 1 means enable */ - if (reg & 1) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; - - if (reg & 0x80000000) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + uint32_t reg = 0; + + if (adev->asic_type == CHIP_TONGA || + adev->asic_type == CHIP_FIJI) { + reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); + /* bit0: 0 means pf and 1 means vf */ + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + /* bit31: 0 means disable IOV and 1 means enable */ + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + } if (reg == 0) { if (is_virtual_machine()) /* passthrough mode exclus sr-iov mode */ @@ -667,7 +671,7 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) /* disable BM */ pci_clear_master(adev->pdev); /* reset */ - amdgpu_pci_config_reset(adev); + amdgpu_device_pci_config_reset(adev); udelay(100); @@ -891,8 +895,8 @@ static int vi_common_early_init(void *handle) adev->asic_funcs = &vi_asic_funcs; - if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) && - (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) && + (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) smc_enabled = true; adev->rev_id = vi_get_rev_id(adev); @@ -1074,7 +1078,7 @@ static int vi_common_early_init(void *handle) /* vi use smc load by default */ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_get_pcie_info(adev); + amdgpu_device_get_pcie_info(adev); return 0; } @@ -1487,115 +1491,115 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_TOPAZ: /* topaz has no DCE, UVD, VCE */ - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_4_ip_block); - amdgpu_ip_block_add(adev, &iceland_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block); + amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v2_4_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block); break; case CHIP_FIJI: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_5_ip_block); - amdgpu_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block); + amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v10_1_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v10_1_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) { - amdgpu_ip_block_add(adev, &uvd_v6_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; case CHIP_TONGA: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block); - amdgpu_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v10_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) { - amdgpu_ip_block_add(adev, &uvd_v5_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v5_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; case CHIP_POLARIS11: case CHIP_POLARIS10: case CHIP_POLARIS12: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_1_ip_block); - amdgpu_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v11_2_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_1_ip_block); - amdgpu_ip_block_add(adev, &uvd_v6_3_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_4_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); break; case CHIP_CARRIZO: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block); - amdgpu_ip_block_add(adev, &cz_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v11_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); - amdgpu_ip_block_add(adev, &uvd_v6_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block); #if defined(CONFIG_DRM_AMD_ACP) - amdgpu_ip_block_add(adev, &acp_ip_block); + amdgpu_device_ip_block_add(adev, &acp_ip_block); #endif break; case CHIP_STONEY: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block); - amdgpu_ip_block_add(adev, &cz_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v11_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_1_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); - amdgpu_ip_block_add(adev, &uvd_v6_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_4_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); #if defined(CONFIG_DRM_AMD_ACP) - amdgpu_ip_block_add(adev, &acp_ip_block); + amdgpu_device_ip_block_add(adev, &acp_ip_block); #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index dbf3703cbd1b837cc1734ed7c0e7db8e16217ff6..19ddd2312e00d4c8f0178d4fe6130d8b80fb09b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -27,6 +27,8 @@ #define SDMA1_REGISTER_OFFSET 0x200 /* not a register */ #define SDMA_MAX_INSTANCE 2 +#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */ + /* crtc instance offsets */ #define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c) #define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 7bb0bc0ca3d6aaa9ff330d41c896089ce2121011..a317e76ffb5e4aae53ba208ed63c290d7b093dbc 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -1,4 +1,24 @@ -# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# # # Makefile for Heterogenous System Architecture support for AMD GPU devices # @@ -15,6 +35,8 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o + kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o + +amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm new file mode 100644 index 0000000000000000000000000000000000000000..997a383dcb8b775c6ac27250d214da6b4129b304 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -0,0 +1,1384 @@ +/* + * Copyright 2015-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if 0 +HW (VI) source code for CWSR trap handler +#Version 18 + multiple trap handler + +// this performance-optimal version was originally from Seven Xu at SRDC + +// Revison #18 --... +/* Rev History +** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) +** #4. SR Memory Layout: +** 1. VGPR-SGPR-HWREG-{LDS} +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) +** #7. Update: 1. don't barrier if noLDS +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version +** 2. Fix SQ issue by s_sleep 2 +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last +** 2. optimize s_buffer save by burst 16sgprs... +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. +** #11. Update 1. Add 2 more timestamp for debug version +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance +** #13. Integ 1. Always use MUBUF for PV trap shader... +** #14. Update 1. s_buffer_store soft clause... +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] +** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 +** 2. FUNC - Handle non-CWSR traps +*/ + +var G8SR_WDMEM_HWREG_OFFSET = 0 +var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes + +// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. + +var G8SR_DEBUG_TIMESTAMP = 0 +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset +var s_g8sr_ts_save_s = s[34:35] // save start +var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi +var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ +var s_g8sr_ts_save_d = s[40:41] // save end +var s_g8sr_ts_restore_s = s[42:43] // restore start +var s_g8sr_ts_restore_d = s[44:45] // restore end + +var G8SR_VGPR_SR_IN_DWX4 = 0 +var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes +var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 + + +/*************************************************************************/ +/* control on how to run the shader */ +/*************************************************************************/ +//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) +var EMU_RUN_HACK = 0 +var EMU_RUN_HACK_RESTORE_NORMAL = 0 +var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 +var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var SAVE_LDS = 1 +var WG_BASE_ADDR_LO = 0x9000a000 +var WG_BASE_ADDR_HI = 0x0 +var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem +var CTX_SAVE_CONTROL = 0x0 +var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) +var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes +var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing + +/**************************************************************************/ +/* variables */ +/**************************************************************************/ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_IB_STS_RCNT_SIZE = 4 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 //FIXME +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + + +/* Save */ +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi + + //tba_lo and tba_hi need to be saved/restored +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_status = ttmp4 +var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_xnack_mask_lo = ttmp6 +var s_save_xnack_mask_hi = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 + +var s_save_mem_offset = tma_lo +var s_save_alloc_size = s_save_trapsts //conflict +var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) +var s_save_m0 = tma_hi + +/* Restore */ +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi + +var s_restore_mem_offset = ttmp2 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored +var s_restore_mem_offset_save = s_restore_tmp //no conflict + +var s_restore_m0 = s_restore_alloc_size //no conflict + +var s_restore_mode = ttmp7 + +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = tma_lo //no conflict +var s_restore_exec_hi = tma_hi //no conflict +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask_lo = xnack_mask_lo +var s_restore_xnack_mask_hi = xnack_mask_hi +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 + +/**************************************************************************/ +/* trap handler entry points */ +/**************************************************************************/ +/* Shader Main*/ + +shader main + asic(VI) + type(CS) + + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore + //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC + s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. + s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE + //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE + s_branch L_SKIP_RESTORE //NOT restore, SAVE actually + else + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + end + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE //restore + +L_SKIP_RESTORE: + + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* +if (!EMU_RUN_HACK) + /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ + s_load_dwordx4 [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0 + s_waitcnt lgkmcnt(0) + s_or_b32 ttmp7, ttmp8, ttmp9 + s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + s_rfe_b64 [ttmp0, ttmp1] +end + // ********* End handling of non-CWSR traps ******************* + +/**************************************************************************/ +/* save routine */ +/**************************************************************************/ + +L_SAVE: + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? +end + + //check whether there is mem_viol + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_NO_PC_REWIND + + //if so, need rewind PC assuming GDS operation gets NACKed + s_mov_b32 s_save_tmp, 0 //clear mem_viol bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 + s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc + +L_NO_PC_REWIND: + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + + s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK + s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi //save XNACK must before any memory operation + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + + s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_sq_save_msg + s_waitcnt lgkmcnt(0) +end + + if (EMU_RUN_HACK) + + else + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + + if (EMU_RUN_HACK) + + else + s_cbranch_execz L_SLEEP + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_spi_wrexec + s_waitcnt lgkmcnt(0) +end + + /* setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 + s_lshr_b32 s_save_tmp, s_save_tmp, 6 + s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + + + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) + s_mov_b32 s_save_m0, m0 //save M0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 + + + + + /* save HW registers */ + ////////////////////////////// + + L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + + + s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 + + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO + s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS + + //s_save_trapsts conflicts with s_save_alloc_size + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + + write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO + write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO + write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI + + + + /* the first wave in the threadgroup */ + // save fist_wave bits in tba_hi unused bit.26 + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit + //s_or_b32 tba_hi, s_save_tmp, tba_hi // save first wave bit to tba_hi.bits[26] + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4 + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? + // restore s_save_buf_rsrc0,1 + //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo + s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo + + + + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + ///////////////////////////////////////////////////////////////////////////////////// + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 +end + + + + /* save LDS */ + ////////////////////////////// + + L_SAVE_LDS: + + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + //s_and_b32 s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + +var LDS_DMA_ENABLE = 0 +var UNROLL = 0 +if UNROLL==0 && LDS_DMA_ENABLE==1 + s_mov_b32 s3, 256*2 + s_nop 0 + s_nop 0 + s_nop 0 + L_SAVE_LDS_LOOP: + //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? + if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + end + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? + +elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss + // store from higest LDS address to lowest + s_mov_b32 s3, 256*2 + s_sub_u32 m0, s_save_alloc_size, s3 + s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 + s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... + s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest + s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction + s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc + s_nop 0 + s_nop 0 + s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes + s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved + s_add_u32 s0, s0,s_save_alloc_size + s_addc_u32 s1, s1, 0 + s_setpc_b64 s[0:1] + + + for var i =0; i< 128; i++ + // be careful to make here a 64Byte aligned address, which could improve performance... + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + + if i!=127 + s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline + s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 + end + end + +else // BUFFER_STORE + v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 + v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + v_mul_i32_i24 v2, v3, 8 // tid*8 + v_mov_b32 v3, 256*2 + s_mov_b32 m0, 0x10000 + s_mov_b32 s0, s_save_buf_rsrc3 + s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT + +L_SAVE_LDS_LOOP_VECTOR: + ds_read_b64 v[0:1], v2 //x =LDS[a], byte address + s_waitcnt lgkmcnt(0) + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 +// s_waitcnt vmcnt(0) + v_add_u32 v2, vcc[0:1], v2, v3 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR + + // restore rsrc3 + s_mov_b32 s_save_buf_rsrc3, s0 + +end + +L_SAVE_LDS_DONE: + + + /* save VGPRs - set the Rest VGPRs */ + ////////////////////////////////////////////////////////////////////////////////////// + L_SAVE_VGPR: + // VGPR SR memory offset: 0 + // TODO rearrange the RSRC words to use swizzle for VGPR save... + + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, 4 // skip first 4 VGPRs + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs + + s_set_gpr_idx_on m0, 0x1 // This will change M0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 +L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 // v0 = v[0+m0] + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +L_SAVE_VGPR_LOOP_END: + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =0 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + + + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later + + L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 //v0 = v[0+m0] + v_mov_b32 v1, v1 //v0 = v[0+m0] + v_mov_b32 v2, v2 //v0 = v[0+m0] + v_mov_b32 v3, v3 //v0 = v[0+m0] + + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + end + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +end + +L_SAVE_VGPR_END: + + + + + + + /* S_PGM_END_SAVED */ //FIXME graphics ONLY + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_rfe_b64 s_save_pc_lo //Return to the main shader program + else + end + +// Save Done timestamp +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_d + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // Need reset rsrc2?? + s_mov_b32 m0, s_save_mem_offset + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 +end + + + s_branch L_END_PGM + + + +/**************************************************************************/ +/* restore routine */ +/**************************************************************************/ + +L_RESTORE: + /* Setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_restore_tmp, v9, 0 + s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 + s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE + s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL + else + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... + s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] + s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. +end + + + + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + + /* global mem offset */ +// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 + + /* the first wave in the threadgroup */ + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ + ////////////////////////////// + L_RESTORE_LDS: + + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? + + + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + L_RESTORE_LDS_LOOP: + if (SAVE_LDS) + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW + end + s_add_u32 m0, m0, 256*2 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? + + + /* restore VGPRs */ + ////////////////////////////// + L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +if G8SR_VGPR_SR_IN_DWX4 + get_vgpr_size_bytes(s_restore_mem_offset) + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, s_restore_alloc_size + s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 + +L_RESTORE_VGPR_LOOP: + buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + s_sub_u32 m0, m0, 4 + v_mov_b32 v0, v0 // v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_cmp_eq_u32 m0, 0x8000 + s_cbranch_scc0 L_RESTORE_VGPR_LOOP + s_set_gpr_idx_off + + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + +else + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 1 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later + + L_RESTORE_VGPR_LOOP: + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + end + s_waitcnt vmcnt(0) //ensure data ready + v_mov_b32 v0, v0 //v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? + s_set_gpr_idx_off + /* VGPR restore on v0 */ + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + end + +end + + /* restore SGPRs */ + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111), + However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG + */ + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? + + /* restore HW registers */ + ////////////////////////////// + L_RESTORE_HWREG: + + +if G8SR_DEBUG_TIMESTAMP + s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo + s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi +end + + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + + + s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE + read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_LO + read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_HI + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d + s_waitcnt lgkmcnt(0) +end + +// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + + +/**************************************************************************/ +/* the END */ +/**************************************************************************/ +L_END_PGM: + s_endpgm + +end + + +/**************************************************************************/ +/* the helper functions */ +/**************************************************************************/ + +//Only for save hwreg to mem +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +// HWREG are saved before SGPRs, so all HWREG could be use. +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 +end + + + +function get_lds_size_bytes(s_lds_size_byte) + // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW +end + +function get_vgpr_size_bytes(s_vgpr_size_byte) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible +end + +function get_sgpr_size_bytes(s_sgpr_size_byte) + s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1 + s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value) +end + +function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes +end + + +#endif + +static const uint32_t cwsr_trap_gfx8_hex[] = { + 0xbf820001, 0xbf820123, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, + 0xc00a1e37, 0x00000000, + 0xbf8c007f, 0x87777978, + 0xbf840002, 0xb974f802, + 0xbe801d78, 0xb8f5f803, + 0x8675ff75, 0x000001ff, + 0xbf850002, 0x80708470, + 0x82718071, 0x8671ff71, + 0x0000ffff, 0xb974f802, + 0xbe801f70, 0xb8f5f803, + 0x8675ff75, 0x00000100, + 0xbf840006, 0xbefa0080, + 0xb97a0203, 0x8671ff71, + 0x0000ffff, 0x80f08870, + 0x82f18071, 0xbefa0080, + 0xb97a0283, 0xbef60068, + 0xbef70069, 0xb8fa1c07, + 0x8e7a9c7a, 0x87717a71, + 0xb8fa03c7, 0x8e7a9b7a, + 0x87717a71, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbef2007e, + 0xbef3007f, 0xbefe0180, + 0xbf900004, 0xbf8e0002, + 0xbf88fffe, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x877b7a7b, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x877b7a7b, 0xbeef007c, + 0xbeee0080, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cbc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611d3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xb8f5f803, + 0xbefe007c, 0xbefc006e, + 0xc0611d7c, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dbc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dfc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xb8eff801, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbef30080, + 0x8773737a, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8f51605, 0x80758175, + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, + 0x82798079, 0xbefc0080, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003c, 0x00000000, + 0xc06b013c, 0x00000010, + 0xc06b023c, 0x00000020, + 0xc06b033c, 0x00000030, + 0x8078c078, 0x82798079, + 0x807c907c, 0xbf0a757c, + 0xbf85ffeb, 0xbef80176, + 0xbeee0080, 0xbefe00c1, + 0xbeff00c1, 0xbefa00ff, + 0x01000000, 0xe0724000, + 0x6e1e0000, 0xe0724100, + 0x6e1e0100, 0xe0724200, + 0x6e1e0200, 0xe0724300, + 0x6e1e0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f54306, + 0x8675c175, 0xbf84002c, + 0xbf8a0000, 0x867aff73, + 0x04000000, 0xbf840028, + 0x8e758675, 0x8e758275, + 0xbefa0075, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0x806eff6e, 0x00000080, + 0xbefa00ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe80007b, + 0x867bff7b, 0xff7fffff, + 0x877bff7b, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8c007f, 0xe0765000, + 0x6e1e0002, 0x32040702, + 0xd0c9006a, 0x0000eb02, + 0xbf87fff7, 0xbefb0000, + 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8f52a05, 0x80758175, + 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, + 0xbefc0084, 0xbf0a757c, + 0xbf840015, 0xbf11017c, + 0x8075ff75, 0x00001000, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, + 0x807c847c, 0x806eff6e, + 0x00000400, 0xbf0a757c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200ca, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x8676ff7f, + 0x08000000, 0x8f768376, + 0x877b767b, 0x8676ff7f, + 0x70000000, 0x8f768176, + 0x877b767b, 0x8676ff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8f34306, 0x8673c173, + 0xbf840019, 0x8e738673, + 0x8e738273, 0xbefa0073, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x721e0000, + 0xe0510100, 0x721e0000, + 0x807cff7c, 0x00000200, + 0x8072ff72, 0x00000200, + 0xbf0a737c, 0xbf85fff6, + 0xbef20080, 0xbefe00c1, + 0xbeff00c1, 0xb8f32a05, + 0x80738173, 0x8e738273, + 0x8e7a8873, 0xbefa00ff, + 0x01000000, 0xbef60072, + 0x8072ff72, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x8073ff73, 0x00008000, + 0xe0524000, 0x721e0000, + 0xe0524100, 0x721e0100, + 0xe0524200, 0x721e0200, + 0xe0524300, 0x721e0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8072ff72, 0x00000400, + 0xbf0a737c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x761e0000, 0xe0524100, + 0x761e0100, 0xe0524200, + 0x761e0200, 0xe0524300, + 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0x80f2c072, 0xb8f31605, + 0x80738173, 0x8e738473, + 0x8e7a8273, 0xbefa00ff, + 0x01000000, 0xbefc0073, + 0xc031003c, 0x00000072, + 0x80f2c072, 0xbf8c007f, + 0x80fc907c, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff1, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, + 0xc0211c3c, 0x00000072, + 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, + 0xc0211bbc, 0x00000072, + 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, + 0xc0211d3c, 0x00000072, + 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, + 0xc0211a3c, 0x00000072, + 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, + 0xc0211dfc, 0x00000072, + 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, + 0xc0211b7c, 0x00000072, + 0x80728472, 0xbf8c007f, + 0x8671ff71, 0x0000ffff, + 0xbefc0073, 0xbefe006e, + 0xbeff006f, 0x867375ff, + 0x000003ff, 0xb9734803, + 0x867375ff, 0xfffff800, + 0x8f738b73, 0xb973a2c3, + 0xb977f801, 0x8673ff71, + 0xf0000000, 0x8f739c73, + 0x8e739073, 0xbef60080, + 0x87767376, 0x8673ff71, + 0x08000000, 0x8f739b73, + 0x8e738f73, 0x87767376, + 0x8673ff74, 0x00800000, + 0x8f739773, 0xb976f807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb974f802, 0xbf8a0000, + 0x95807370, 0xbf810000, +}; + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 505d39156acdb1daa5300cc3bdbfd353d6085299..62c3d9cd6ef18807e754fa100f3f35c63b873a4d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -117,7 +117,7 @@ static int kfd_open(struct inode *inode, struct file *filep) return -EPERM; } - process = kfd_create_process(current); + process = kfd_create_process(filep); if (IS_ERR(process)) return PTR_ERR(process); @@ -206,6 +206,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->ctx_save_restore_area_address = args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; + q_properties->ctl_stack_size = args->ctl_stack_size; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -431,6 +432,38 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, return err; } +static int kfd_ioctl_set_trap_handler(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_trap_handler_args *args = data; + struct kfd_dev *dev; + int err = 0; + struct kfd_process_device *pdd; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = -ESRCH; + goto out; + } + + if (dev->dqm->ops.set_trap_handler(dev->dqm, + &pdd->qpd, + args->tba_addr, + args->tma_addr)) + err = -EINVAL; + +out: + mutex_unlock(&p->mutex); + + return err; +} + static int kfd_ioctl_dbg_register(struct file *filep, struct kfd_process *p, void *data) { @@ -493,7 +526,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, long status; dev = kfd_device_by_id(args->gpu_id); - if (!dev) + if (!dev || !dev->dbgmgr) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -979,7 +1012,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_set_scratch_backing_va, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, - kfd_ioctl_get_tile_config, 0) + kfd_ioctl_get_tile_config, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, + kfd_ioctl_set_trap_handler, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1088,6 +1124,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) KFD_MMAP_EVENTS_MASK) { vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; return kfd_event_mmap(process, vma); + } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == + KFD_MMAP_RESERVED_MEM_MASK) { + vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; + return kfd_reserved_mem_mmap(process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c new file mode 100644 index 0000000000000000000000000000000000000000..2bc2816767a7bbda4ea85fce39c83e05c9215058 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -0,0 +1,1267 @@ +/* + * Copyright 2015-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include "kfd_crat.h" +#include "kfd_priv.h" +#include "kfd_topology.h" + +/* GPU Processor ID base for dGPUs for which VCRAT needs to be created. + * GPU processor ID are expressed with Bit[31]=1. + * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs + * used in the CRAT. + */ +static uint32_t gpu_processor_id_low = 0x80001000; + +/* Return the next available gpu_processor_id and increment it for next GPU + * @total_cu_count - Total CUs present in the GPU including ones + * masked off + */ +static inline unsigned int get_and_inc_gpu_processor_id( + unsigned int total_cu_count) +{ + int current_id = gpu_processor_id_low; + + gpu_processor_id_low += total_cu_count; + return current_id; +} + +/* Static table to describe GPU Cache information */ +struct kfd_gpu_cache_info { + uint32_t cache_size; + uint32_t cache_level; + uint32_t flags; + /* Indicates how many Compute Units share this cache + * Value = 1 indicates the cache is not shared + */ + uint32_t num_cu_shared; +}; + +static struct kfd_gpu_cache_info kaveri_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + + }, + { + /* Scalar L1 Instruction Cache (in SQC module) per bank */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + { + /* Scalar L1 Data Cache (in SQC module) per bank */ + .cache_size = 8, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 2, + }, + + /* TODO: Add L2 Cache information */ +}; + + +static struct kfd_gpu_cache_info carrizo_cache_info[] = { + { + /* TCP L1 Cache per CU */ + .cache_size = 16, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 1, + }, + { + /* Scalar L1 Instruction Cache (in SQC module) per bank */ + .cache_size = 8, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_INST_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 4, + }, + { + /* Scalar L1 Data Cache (in SQC module) per bank. */ + .cache_size = 4, + .cache_level = 1, + .flags = (CRAT_CACHE_FLAGS_ENABLED | + CRAT_CACHE_FLAGS_DATA_CACHE | + CRAT_CACHE_FLAGS_SIMD_CACHE), + .num_cu_shared = 4, + }, + + /* TODO: Add L2 Cache information */ +}; + +/* NOTE: In future if more information is added to struct kfd_gpu_cache_info + * the following ASICs may need a separate table. + */ +#define hawaii_cache_info kaveri_cache_info +#define tonga_cache_info carrizo_cache_info +#define fiji_cache_info carrizo_cache_info +#define polaris10_cache_info carrizo_cache_info +#define polaris11_cache_info carrizo_cache_info + +static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + dev->node_props.cpu_cores_count = cu->num_cpu_cores; + dev->node_props.cpu_core_id_base = cu->processor_id_low; + if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; + + pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, + cu->processor_id_low); +} + +static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + dev->node_props.simd_id_base = cu->processor_id_low; + dev->node_props.simd_count = cu->num_simd_cores; + dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; + dev->node_props.max_waves_per_simd = cu->max_waves_simd; + dev->node_props.wave_front_size = cu->wave_front_size; + dev->node_props.array_count = cu->array_count; + dev->node_props.cu_per_simd_array = cu->num_cu_per_array; + dev->node_props.simd_per_cu = cu->num_simd_per_cu; + dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; + if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) + dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; + pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); +} + +/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, + struct list_head *device_list) +{ + struct kfd_topology_device *dev; + + pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", + cu->proximity_domain, cu->hsa_capability); + list_for_each_entry(dev, device_list, list) { + if (cu->proximity_domain == dev->proximity_domain) { + if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) + kfd_populated_cu_info_cpu(dev, cu); + + if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) + kfd_populated_cu_info_gpu(dev, cu); + break; + } + } + + return 0; +} + +/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, + struct list_head *device_list) +{ + struct kfd_mem_properties *props; + struct kfd_topology_device *dev; + + pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", + mem->proximity_domain); + list_for_each_entry(dev, device_list, list) { + if (mem->proximity_domain == dev->proximity_domain) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + /* We're on GPU node */ + if (dev->node_props.cpu_cores_count == 0) { + /* APU */ + if (mem->visibility_type == 0) + props->heap_type = + HSA_MEM_HEAP_TYPE_FB_PRIVATE; + /* dGPU */ + else + props->heap_type = mem->visibility_type; + } else + props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; + + if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) + props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; + if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) + props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; + + props->size_in_bytes = + ((uint64_t)mem->length_high << 32) + + mem->length_low; + props->width = mem->width; + + dev->node_props.mem_banks_count++; + list_add_tail(&props->list, &dev->mem_props); + + break; + } + } + + return 0; +} + +/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + struct list_head *device_list) +{ + struct kfd_cache_properties *props; + struct kfd_topology_device *dev; + uint32_t id; + uint32_t total_num_of_cu; + + id = cache->processor_id_low; + + pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); + list_for_each_entry(dev, device_list, list) { + total_num_of_cu = (dev->node_props.array_count * + dev->node_props.cu_per_simd_array); + + /* Cache infomration in CRAT doesn't have proximity_domain + * information as it is associated with a CPU core or GPU + * Compute Unit. So map the cache using CPU core Id or SIMD + * (GPU) ID. + * TODO: This works because currently we can safely assume that + * Compute Units are parsed before caches are parsed. In + * future, remove this dependency + */ + if ((id >= dev->node_props.cpu_core_id_base && + id <= dev->node_props.cpu_core_id_base + + dev->node_props.cpu_cores_count) || + (id >= dev->node_props.simd_id_base && + id < dev->node_props.simd_id_base + + total_num_of_cu)) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + props->processor_id_low = id; + props->cache_level = cache->cache_level; + props->cache_size = cache->cache_size; + props->cacheline_size = cache->cache_line_size; + props->cachelines_per_tag = cache->lines_per_tag; + props->cache_assoc = cache->associativity; + props->cache_latency = cache->cache_latency; + memcpy(props->sibling_map, cache->sibling_map, + sizeof(props->sibling_map)); + + if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) + props->cache_type |= HSA_CACHE_TYPE_DATA; + if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) + props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) + props->cache_type |= HSA_CACHE_TYPE_CPU; + if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + props->cache_type |= HSA_CACHE_TYPE_HSACU; + + dev->cache_count++; + dev->node_props.caches_count++; + list_add_tail(&props->list, &dev->cache_props); + + break; + } + } + + return 0; +} + +/* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct + * topology device present in the device_list + */ +static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, + struct list_head *device_list) +{ + struct kfd_iolink_properties *props = NULL, *props2; + struct kfd_topology_device *dev, *cpu_dev; + uint32_t id_from; + uint32_t id_to; + + id_from = iolink->proximity_domain_from; + id_to = iolink->proximity_domain_to; + + pr_debug("Found IO link entry in CRAT table with id_from=%d\n", + id_from); + list_for_each_entry(dev, device_list, list) { + if (id_from == dev->proximity_domain) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + + props->node_from = id_from; + props->node_to = id_to; + props->ver_maj = iolink->version_major; + props->ver_min = iolink->version_minor; + props->iolink_type = iolink->io_interface_type; + + if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) + props->weight = 20; + else + props->weight = node_distance(id_from, id_to); + + props->min_latency = iolink->minimum_latency; + props->max_latency = iolink->maximum_latency; + props->min_bandwidth = iolink->minimum_bandwidth_mbs; + props->max_bandwidth = iolink->maximum_bandwidth_mbs; + props->rec_transfer_size = + iolink->recommended_transfer_size; + + dev->io_link_count++; + dev->node_props.io_links_count++; + list_add_tail(&props->list, &dev->io_link_props); + break; + } + } + + /* CPU topology is created before GPUs are detected, so CPU->GPU + * links are not built at that time. If a PCIe type is discovered, it + * means a GPU is detected and we are adding GPU->CPU to the topology. + * At this time, also add the corresponded CPU->GPU link. + */ + if (props && props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) { + cpu_dev = kfd_topology_device_by_proximity_domain(id_to); + if (!cpu_dev) + return -ENODEV; + /* same everything but the other direction */ + props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + props2->node_from = id_to; + props2->node_to = id_from; + props2->kobj = NULL; + cpu_dev->io_link_count++; + cpu_dev->node_props.io_links_count++; + list_add_tail(&props2->list, &cpu_dev->io_link_props); + } + + return 0; +} + +/* kfd_parse_subtype - parse subtypes and attach it to correct topology device + * present in the device_list + * @sub_type_hdr - subtype section of crat_image + * @device_list - list of topology devices present in this crat_image + */ +static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + struct list_head *device_list) +{ + struct crat_subtype_computeunit *cu; + struct crat_subtype_memory *mem; + struct crat_subtype_cache *cache; + struct crat_subtype_iolink *iolink; + int ret = 0; + + switch (sub_type_hdr->type) { + case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: + cu = (struct crat_subtype_computeunit *)sub_type_hdr; + ret = kfd_parse_subtype_cu(cu, device_list); + break; + case CRAT_SUBTYPE_MEMORY_AFFINITY: + mem = (struct crat_subtype_memory *)sub_type_hdr; + ret = kfd_parse_subtype_mem(mem, device_list); + break; + case CRAT_SUBTYPE_CACHE_AFFINITY: + cache = (struct crat_subtype_cache *)sub_type_hdr; + ret = kfd_parse_subtype_cache(cache, device_list); + break; + case CRAT_SUBTYPE_TLB_AFFINITY: + /* + * For now, nothing to do here + */ + pr_debug("Found TLB entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: + /* + * For now, nothing to do here + */ + pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_IOLINK_AFFINITY: + iolink = (struct crat_subtype_iolink *)sub_type_hdr; + ret = kfd_parse_subtype_iolink(iolink, device_list); + break; + default: + pr_warn("Unknown subtype %d in CRAT\n", + sub_type_hdr->type); + } + + return ret; +} + +/* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT + * create a kfd_topology_device and add in to device_list. Also parse + * CRAT subtypes and attach it to appropriate kfd_topology_device + * @crat_image - input image containing CRAT + * @device_list - [OUT] list of kfd_topology_device generated after + * parsing crat_image + * @proximity_domain - Proximity domain of the first device in the table + * + * Return - 0 if successful else -ve value + */ +int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, + uint32_t proximity_domain) +{ + struct kfd_topology_device *top_dev = NULL; + struct crat_subtype_generic *sub_type_hdr; + uint16_t node_id; + int ret = 0; + struct crat_header *crat_table = (struct crat_header *)crat_image; + uint16_t num_nodes; + uint32_t image_len; + + if (!crat_image) + return -EINVAL; + + if (!list_empty(device_list)) { + pr_warn("Error device list should be empty\n"); + return -EINVAL; + } + + num_nodes = crat_table->num_domains; + image_len = crat_table->length; + + pr_info("Parsing CRAT table with %d nodes\n", num_nodes); + + for (node_id = 0; node_id < num_nodes; node_id++) { + top_dev = kfd_create_topology_device(device_list); + if (!top_dev) + break; + top_dev->proximity_domain = proximity_domain++; + } + + if (!top_dev) { + ret = -ENOMEM; + goto err; + } + + memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); + memcpy(top_dev->oem_table_id, crat_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); + top_dev->oem_revision = crat_table->oem_revision; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); + while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < + ((char *)crat_image) + image_len) { + if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { + ret = kfd_parse_subtype(sub_type_hdr, device_list); + if (ret) + break; + } + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + } + +err: + if (ret) + kfd_release_topology_device_list(device_list); + + return ret; +} + +/* Helper function. See kfd_fill_gpu_cache_info for parameter description */ +static int fill_in_pcache(struct crat_subtype_cache *pcache, + struct kfd_gpu_cache_info *pcache_info, + struct kfd_cu_info *cu_info, + int mem_available, + int cu_bitmask, + int cache_type, unsigned int cu_processor_id, + int cu_block) +{ + unsigned int cu_sibling_map_mask; + int first_active_cu; + + /* First check if enough memory is available */ + if (sizeof(struct crat_subtype_cache) > mem_available) + return -ENOMEM; + + cu_sibling_map_mask = cu_bitmask; + cu_sibling_map_mask >>= cu_block; + cu_sibling_map_mask &= + ((1 << pcache_info[cache_type].num_cu_shared) - 1); + first_active_cu = ffs(cu_sibling_map_mask); + + /* CU could be inactive. In case of shared cache find the first active + * CU. and incase of non-shared cache check if the CU is inactive. If + * inactive active skip it + */ + if (first_active_cu) { + memset(pcache, 0, sizeof(struct crat_subtype_cache)); + pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; + pcache->length = sizeof(struct crat_subtype_cache); + pcache->flags = pcache_info[cache_type].flags; + pcache->processor_id_low = cu_processor_id + + (first_active_cu - 1); + pcache->cache_level = pcache_info[cache_type].cache_level; + pcache->cache_size = pcache_info[cache_type].cache_size; + + /* Sibling map is w.r.t processor_id_low, so shift out + * inactive CU + */ + cu_sibling_map_mask = + cu_sibling_map_mask >> (first_active_cu - 1); + + pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); + pcache->sibling_map[1] = + (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); + pcache->sibling_map[2] = + (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); + pcache->sibling_map[3] = + (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); + return 0; + } + return 1; +} + +/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info + * tables + * + * @kdev - [IN] GPU device + * @gpu_processor_id - [IN] GPU processor ID to which these caches + * associate + * @available_size - [IN] Amount of memory available in pcache + * @cu_info - [IN] Compute Unit info obtained from KGD + * @pcache - [OUT] memory into which cache data is to be filled in. + * @size_filled - [OUT] amount of data used up in pcache. + * @num_of_entries - [OUT] number of caches added + */ +static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, + int gpu_processor_id, + int available_size, + struct kfd_cu_info *cu_info, + struct crat_subtype_cache *pcache, + int *size_filled, + int *num_of_entries) +{ + struct kfd_gpu_cache_info *pcache_info; + int num_of_cache_types = 0; + int i, j, k; + int ct = 0; + int mem_available = available_size; + unsigned int cu_processor_id; + int ret; + + switch (kdev->device_info->asic_family) { + case CHIP_KAVERI: + pcache_info = kaveri_cache_info; + num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); + break; + case CHIP_HAWAII: + pcache_info = hawaii_cache_info; + num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); + break; + case CHIP_CARRIZO: + pcache_info = carrizo_cache_info; + num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); + break; + case CHIP_TONGA: + pcache_info = tonga_cache_info; + num_of_cache_types = ARRAY_SIZE(tonga_cache_info); + break; + case CHIP_FIJI: + pcache_info = fiji_cache_info; + num_of_cache_types = ARRAY_SIZE(fiji_cache_info); + break; + case CHIP_POLARIS10: + pcache_info = polaris10_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); + break; + case CHIP_POLARIS11: + pcache_info = polaris11_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); + break; + default: + return -EINVAL; + } + + *size_filled = 0; + *num_of_entries = 0; + + /* For each type of cache listed in the kfd_gpu_cache_info table, + * go through all available Compute Units. + * The [i,j,k] loop will + * if kfd_gpu_cache_info.num_cu_shared = 1 + * will parse through all available CU + * If (kfd_gpu_cache_info.num_cu_shared != 1) + * then it will consider only one CU from + * the shared unit + */ + + for (ct = 0; ct < num_of_cache_types; ct++) { + cu_processor_id = gpu_processor_id; + for (i = 0; i < cu_info->num_shader_engines; i++) { + for (j = 0; j < cu_info->num_shader_arrays_per_engine; + j++) { + for (k = 0; k < cu_info->num_cu_per_sh; + k += pcache_info[ct].num_cu_shared) { + + ret = fill_in_pcache(pcache, + pcache_info, + cu_info, + mem_available, + cu_info->cu_bitmap[i][j], + ct, + cu_processor_id, + k); + + if (ret < 0) + break; + + if (!ret) { + pcache++; + (*num_of_entries)++; + mem_available -= + sizeof(*pcache); + (*size_filled) += + sizeof(*pcache); + } + + /* Move to next CU block */ + cu_processor_id += + pcache_info[ct].num_cu_shared; + } + } + } + } + + pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); + + return 0; +} + +/* + * kfd_create_crat_image_acpi - Allocates memory for CRAT image and + * copies CRAT from ACPI (if available). + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * + * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then + * crat_image will be NULL + * @size: [OUT] size of crat_image + * + * Return 0 if successful else return error code + */ +int kfd_create_crat_image_acpi(void **crat_image, size_t *size) +{ + struct acpi_table_header *crat_table; + acpi_status status; + void *pcrat_image; + + if (!crat_image) + return -EINVAL; + + *crat_image = NULL; + + /* Fetch the CRAT table from ACPI */ + status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); + if (status == AE_NOT_FOUND) { + pr_warn("CRAT table not found\n"); + return -ENODATA; + } else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); + + pr_err("CRAT table error: %s\n", err); + return -EINVAL; + } + + if (ignore_crat) { + pr_info("CRAT table disabled by module option\n"); + return -ENODATA; + } + + pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + + memcpy(pcrat_image, crat_table, crat_table->length); + + *crat_image = pcrat_image; + *size = crat_table->length; + + return 0; +} + +/* Memory required to create Virtual CRAT. + * Since there is no easy way to predict the amount of memory required, the + * following amount are allocated for CPU and GPU Virtual CRAT. This is + * expected to cover all known conditions. But to be safe additional check + * is put in the code to ensure we don't overwrite. + */ +#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) +#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) + +/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node + * + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, + struct crat_subtype_computeunit *sub_type_hdr) +{ + const struct cpumask *cpumask; + + *avail_size -= sizeof(struct crat_subtype_computeunit); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + cpumask = cpumask_of_node(numa_node_id); + + /* Fill in CU data */ + sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; + sub_type_hdr->proximity_domain = proximity_domain; + sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); + if (sub_type_hdr->processor_id_low == -1) + return -EINVAL; + + sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); + + return 0; +} + +/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node + * + * @numa_node_id: CPU NUMA node id + * @avail_size: Available size in the memory + * @sub_type_hdr: Memory into which compute info will be filled in + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, + int proximity_domain, + struct crat_subtype_memory *sub_type_hdr) +{ + uint64_t mem_in_bytes = 0; + pg_data_t *pgdat; + int zone_type; + + *avail_size -= sizeof(struct crat_subtype_memory); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_memory); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in Memory Subunit data */ + + /* Unlike si_meminfo, si_meminfo_node is not exported. So + * the following lines are duplicated from si_meminfo_node + * function + */ + pgdat = NODE_DATA(numa_node_id); + for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) + mem_in_bytes += pgdat->node_zones[zone_type].managed_pages; + mem_in_bytes <<= PAGE_SHIFT; + + sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); + sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); + sub_type_hdr->proximity_domain = proximity_domain; + + return 0; +} + +static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, + uint32_t *num_entries, + struct crat_subtype_iolink *sub_type_hdr) +{ + int nid; + struct cpuinfo_x86 *c = &cpu_data(0); + uint8_t link_type; + + if (c->x86_vendor == X86_VENDOR_AMD) + link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; + else + link_type = CRAT_IOLINK_TYPE_QPI_1_1; + + *num_entries = 0; + + /* Create IO links from this node to other CPU nodes */ + for_each_online_node(nid) { + if (nid == numa_node_id) /* node itself */ + continue; + + *avail_size -= sizeof(struct crat_subtype_iolink); + if (*avail_size < 0) + return -ENOMEM; + + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_iolink); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in IO link data */ + sub_type_hdr->proximity_domain_from = numa_node_id; + sub_type_hdr->proximity_domain_to = nid; + sub_type_hdr->io_interface_type = link_type; + + (*num_entries)++; + sub_type_hdr++; + } + + return 0; +} + +/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU + * + * @pcrat_image: Fill in VCRAT for CPU + * @size: [IN] allocated size of crat_image. + * [OUT] actual size of data filled in crat_image + */ +static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) +{ + struct crat_header *crat_table = (struct crat_header *)pcrat_image; + struct acpi_table_header *acpi_table; + acpi_status status; + struct crat_subtype_generic *sub_type_hdr; + int avail_size = *size; + int numa_node_id; + uint32_t entries = 0; + int ret = 0; + + if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) + return -EINVAL; + + /* Fill in CRAT Header. + * Modify length and total_entries as subunits are added. + */ + avail_size -= sizeof(struct crat_header); + if (avail_size < 0) + return -ENOMEM; + + memset(crat_table, 0, sizeof(struct crat_header)); + memcpy(&crat_table->signature, CRAT_SIGNATURE, + sizeof(crat_table->signature)); + crat_table->length = sizeof(struct crat_header); + + status = acpi_get_table("DSDT", 0, &acpi_table); + if (status == AE_NOT_FOUND) + pr_warn("DSDT table not found for OEM information\n"); + else { + crat_table->oem_revision = acpi_table->revision; + memcpy(crat_table->oem_id, acpi_table->oem_id, + CRAT_OEMID_LENGTH); + memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, + CRAT_OEMTABLEID_LENGTH); + } + crat_table->total_entries = 0; + crat_table->num_domains = 0; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); + + for_each_online_node(numa_node_id) { + if (kfd_numa_node_to_apic_id(numa_node_id) == -1) + continue; + + /* Fill in Subtype: Compute Unit */ + ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, + crat_table->num_domains, + (struct crat_subtype_computeunit *)sub_type_hdr); + if (ret < 0) + return ret; + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + + /* Fill in Subtype: Memory */ + ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, + crat_table->num_domains, + (struct crat_subtype_memory *)sub_type_hdr); + if (ret < 0) + return ret; + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + + /* Fill in Subtype: IO Link */ + ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, + &entries, + (struct crat_subtype_iolink *)sub_type_hdr); + if (ret < 0) + return ret; + crat_table->length += (sub_type_hdr->length * entries); + crat_table->total_entries += entries; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length * entries); + + crat_table->num_domains++; + } + + /* TODO: Add cache Subtype for CPU. + * Currently, CPU cache information is available in function + * detect_cache_attributes(cpu) defined in the file + * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not + * exported and to get the same information the code needs to be + * duplicated. + */ + + *size = crat_table->length; + pr_info("Virtual CRAT table created for CPU\n"); + + return 0; +} + +static int kfd_fill_gpu_memory_affinity(int *avail_size, + struct kfd_dev *kdev, uint8_t type, uint64_t size, + struct crat_subtype_memory *sub_type_hdr, + uint32_t proximity_domain, + const struct kfd_local_mem_info *local_mem_info) +{ + *avail_size -= sizeof(struct crat_subtype_memory); + if (*avail_size < 0) + return -ENOMEM; + + memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); + sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_memory); + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; + + sub_type_hdr->proximity_domain = proximity_domain; + + pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", + type, size); + + sub_type_hdr->length_low = lower_32_bits(size); + sub_type_hdr->length_high = upper_32_bits(size); + + sub_type_hdr->width = local_mem_info->vram_width; + sub_type_hdr->visibility_type = type; + + return 0; +} + +/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU + * to its NUMA node + * @avail_size: Available size in the memory + * @kdev - [IN] GPU device + * @sub_type_hdr: Memory into which io link info will be filled in + * @proximity_domain - proximity domain of the GPU node + * + * Return 0 if successful else return -ve value + */ +static int kfd_fill_gpu_direct_io_link(int *avail_size, + struct kfd_dev *kdev, + struct crat_subtype_iolink *sub_type_hdr, + uint32_t proximity_domain) +{ + *avail_size -= sizeof(struct crat_subtype_iolink); + if (*avail_size < 0) + return -ENOMEM; + + memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); + + /* Fill in subtype header data */ + sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_iolink); + sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill in IOLINK subtype. + * TODO: Fill-in other fields of iolink subtype + */ + sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; + sub_type_hdr->proximity_domain_from = proximity_domain; +#ifdef CONFIG_NUMA + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) + sub_type_hdr->proximity_domain_to = 0; + else + sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; +#else + sub_type_hdr->proximity_domain_to = 0; +#endif + return 0; +} + +/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU + * + * @pcrat_image: Fill in VCRAT for GPU + * @size: [IN] allocated size of crat_image. + * [OUT] actual size of data filled in crat_image + */ +static int kfd_create_vcrat_image_gpu(void *pcrat_image, + size_t *size, struct kfd_dev *kdev, + uint32_t proximity_domain) +{ + struct crat_header *crat_table = (struct crat_header *)pcrat_image; + struct crat_subtype_generic *sub_type_hdr; + struct crat_subtype_computeunit *cu; + struct kfd_cu_info cu_info; + struct amd_iommu_device_info iommu_info; + int avail_size = *size; + uint32_t total_num_of_cu; + int num_of_cache_entries = 0; + int cache_mem_filled = 0; + int ret = 0; + const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + struct kfd_local_mem_info local_mem_info; + + if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) + return -EINVAL; + + /* Fill the CRAT Header. + * Modify length and total_entries as subunits are added. + */ + avail_size -= sizeof(struct crat_header); + if (avail_size < 0) + return -ENOMEM; + + memset(crat_table, 0, sizeof(struct crat_header)); + + memcpy(&crat_table->signature, CRAT_SIGNATURE, + sizeof(crat_table->signature)); + /* Change length as we add more subtypes*/ + crat_table->length = sizeof(struct crat_header); + crat_table->num_domains = 1; + crat_table->total_entries = 0; + + /* Fill in Subtype: Compute Unit + * First fill in the sub type header and then sub type data + */ + avail_size -= sizeof(struct crat_subtype_computeunit); + if (avail_size < 0) + return -ENOMEM; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); + memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); + + sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; + sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); + sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; + + /* Fill CU subtype data */ + cu = (struct crat_subtype_computeunit *)sub_type_hdr; + cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; + cu->proximity_domain = proximity_domain; + + kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info); + cu->num_simd_per_cu = cu_info.simd_per_cu; + cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; + cu->max_waves_simd = cu_info.max_waves_per_simd; + + cu->wave_front_size = cu_info.wave_front_size; + cu->array_count = cu_info.num_shader_arrays_per_engine * + cu_info.num_shader_engines; + total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); + cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); + cu->num_cu_per_array = cu_info.num_cu_per_sh; + cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; + cu->num_banks = cu_info.num_shader_engines; + cu->lds_size_in_kb = cu_info.lds_size; + + cu->hsa_capability = 0; + + /* Check if this node supports IOMMU. During parsing this flag will + * translate to HSA_CAP_ATS_PRESENT + */ + iommu_info.flags = 0; + if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { + if ((iommu_info.flags & required_iommu_flags) == + required_iommu_flags) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; + } + + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + /* Fill in Subtype: Memory. Only on systems with large BAR (no + * private FB), report memory as public. On other systems + * report the total FB size (public+private) as a single + * private heap. + */ + kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + + if (local_mem_info.local_mem_size_private == 0) + ret = kfd_fill_gpu_memory_affinity(&avail_size, + kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, + local_mem_info.local_mem_size_public, + (struct crat_subtype_memory *)sub_type_hdr, + proximity_domain, + &local_mem_info); + else + ret = kfd_fill_gpu_memory_affinity(&avail_size, + kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, + local_mem_info.local_mem_size_public + + local_mem_info.local_mem_size_private, + (struct crat_subtype_memory *)sub_type_hdr, + proximity_domain, + &local_mem_info); + if (ret < 0) + return ret; + + crat_table->length += sizeof(struct crat_subtype_memory); + crat_table->total_entries++; + + /* TODO: Fill in cache information. This information is NOT readily + * available in KGD + */ + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, + avail_size, + &cu_info, + (struct crat_subtype_cache *)sub_type_hdr, + &cache_mem_filled, + &num_of_cache_entries); + + if (ret < 0) + return ret; + + crat_table->length += cache_mem_filled; + crat_table->total_entries += num_of_cache_entries; + avail_size -= cache_mem_filled; + + /* Fill in Subtype: IO_LINKS + * Only direct links are added here which is Link from GPU to + * to its NUMA node. Indirect links are added by userspace. + */ + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + cache_mem_filled); + ret = kfd_fill_gpu_direct_io_link(&avail_size, kdev, + (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); + + if (ret < 0) + return ret; + + crat_table->length += sub_type_hdr->length; + crat_table->total_entries++; + + *size = crat_table->length; + pr_info("Virtual CRAT table created for GPU\n"); + + return ret; +} + +/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and + * creates a Virtual CRAT (VCRAT) image + * + * NOTE: Call kfd_destroy_crat_image to free CRAT image memory + * + * @crat_image: VCRAT image created because ACPI does not have a + * CRAT for this device + * @size: [OUT] size of virtual crat_image + * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device + * COMPUTE_UNIT_GPU - Create VCRAT for GPU + * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU + * -- this option is not currently implemented. + * The assumption is that all AMD APUs will have CRAT + * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU + * + * Return 0 if successful else return -ve value + */ +int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + int flags, struct kfd_dev *kdev, + uint32_t proximity_domain) +{ + void *pcrat_image = NULL; + int ret = 0; + + if (!crat_image) + return -EINVAL; + + *crat_image = NULL; + + /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and + * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover + * all the current conditions. A check is put not to overwrite beyond + * allocated size + */ + switch (flags) { + case COMPUTE_UNIT_CPU: + pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + *size = VCRAT_SIZE_FOR_CPU; + ret = kfd_create_vcrat_image_cpu(pcrat_image, size); + break; + case COMPUTE_UNIT_GPU: + if (!kdev) + return -EINVAL; + pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); + if (!pcrat_image) + return -ENOMEM; + *size = VCRAT_SIZE_FOR_GPU; + ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, + proximity_domain); + break; + case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): + /* TODO: */ + ret = -EINVAL; + pr_err("VCRAT not implemented for APU\n"); + break; + default: + ret = -EINVAL; + } + + if (!ret) + *crat_image = pcrat_image; + else + kfree(pcrat_image); + + return ret; +} + + +/* kfd_destroy_crat_image + * + * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) + * + */ +void kfd_destroy_crat_image(void *crat_image) +{ + kfree(crat_image); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index a374fa3d3ee6f28ad009cc30d4217df003248caf..b5cd182b9edd22fa08beb113396714b7e6e17b4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -44,6 +44,10 @@ #define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) +/* Compute Unit flags */ +#define COMPUTE_UNIT_CPU (1 << 0) /* Create Virtual CRAT for CPU */ +#define COMPUTE_UNIT_GPU (1 << 1) /* Create Virtual CRAT for GPU */ + struct crat_header { uint32_t signature; uint32_t length; @@ -105,7 +109,7 @@ struct crat_subtype_computeunit { uint8_t wave_front_size; uint8_t num_banks; uint16_t micro_engine_id; - uint8_t num_arrays; + uint8_t array_count; uint8_t num_cu_per_array; uint8_t num_simd_per_cu; uint8_t max_slots_scatch_cu; @@ -127,13 +131,14 @@ struct crat_subtype_memory { uint8_t length; uint16_t reserved; uint32_t flags; - uint32_t promixity_domain; + uint32_t proximity_domain; uint32_t base_addr_low; uint32_t base_addr_high; uint32_t length_low; uint32_t length_high; uint32_t width; - uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH]; + uint8_t visibility_type; /* for virtual (dGPU) CRAT */ + uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1]; }; /* @@ -222,9 +227,12 @@ struct crat_subtype_ccompute { /* * HSA IO Link Affinity structure and definitions */ -#define CRAT_IOLINK_FLAGS_ENABLED 0x00000001 -#define CRAT_IOLINK_FLAGS_COHERENCY 0x00000002 -#define CRAT_IOLINK_FLAGS_RESERVED 0xfffffffc +#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 /* * IO interface types @@ -232,10 +240,18 @@ struct crat_subtype_ccompute { #define CRAT_IOLINK_TYPE_UNDEFINED 0 #define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 #define CRAT_IOLINK_TYPE_PCIEXPRESS 2 -#define CRAT_IOLINK_TYPE_OTHER 3 +#define CRAT_IOLINK_TYPE_AMBA 3 +#define CRAT_IOLINK_TYPE_MIPI 4 +#define CRAT_IOLINK_TYPE_QPI_1_1 5 +#define CRAT_IOLINK_TYPE_RESERVED1 6 +#define CRAT_IOLINK_TYPE_RESERVED2 7 +#define CRAT_IOLINK_TYPE_RAPID_IO 8 +#define CRAT_IOLINK_TYPE_INFINIBAND 9 +#define CRAT_IOLINK_TYPE_RESERVED3 10 +#define CRAT_IOLINK_TYPE_OTHER 11 #define CRAT_IOLINK_TYPE_MAX 255 -#define CRAT_IOLINK_RESERVED_LENGTH 24 +#define CRAT_IOLINK_RESERVED_LENGTH 24 struct crat_subtype_iolink { uint8_t type; @@ -291,4 +307,14 @@ struct cdit_header { #pragma pack() +struct kfd_dev; + +int kfd_create_crat_image_acpi(void **crat_image, size_t *size); +void kfd_destroy_crat_image(void *crat_image); +int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, + uint32_t proximity_domain); +int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + int flags, struct kfd_dev *kdev, + uint32_t proximity_domain); + #endif /* KFD_CRAT_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index c407f6bd99565c9e4d7c7147a65cdfa8c41417b8..afb26f205d2978717bdeec174603caa0a83d4712 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -95,7 +95,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, ib_packet->bitfields3.ib_base_hi = largep->u.high_part; ib_packet->control = (1 << 23) | (1 << 31) | - ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); + ((size_in_bytes / 4) & 0xfffff); ib_packet->bitfields5.pasid = pasid; @@ -126,8 +126,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, rm_packet->header.opcode = IT_RELEASE_MEM; rm_packet->header.type = PM4_TYPE_3; - rm_packet->header.count = sizeof(struct pm4__release_mem) / - sizeof(unsigned int) - 2; + rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; rm_packet->bitfields2.event_index = @@ -652,8 +651,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; packets_vec[0].header.type = PM4_TYPE_3; packets_vec[0].bitfields2.reg_offset = - GRBM_GFX_INDEX / (sizeof(uint32_t)) - - USERCONFIG_REG_BASE; + GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; packets_vec[0].bitfields2.insert_vmid = 0; packets_vec[0].reg_data[0] = reg_gfx_index.u32All; @@ -661,8 +659,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[1].header.count = 1; packets_vec[1].header.opcode = IT_SET_CONFIG_REG; packets_vec[1].header.type = PM4_TYPE_3; - packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - - AMD_CONFIG_REG_BASE; + packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; packets_vec[1].bitfields2.insert_vmid = 1; @@ -678,8 +675,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packets_vec[2].ordinal1 = packets_vec[0].ordinal1; packets_vec[2].bitfields2.reg_offset = - GRBM_GFX_INDEX / (sizeof(uint32_t)) - - USERCONFIG_REG_BASE; + GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; packets_vec[2].bitfields2.insert_vmid = 0; packets_vec[2].reg_data[0] = reg_gfx_index.u32All; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c new file mode 100644 index 0000000000000000000000000000000000000000..4bd6ebfaf425bcc88947bf41cc6b2ad832bc84b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -0,0 +1,75 @@ +/* + * Copyright 2016-2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "kfd_priv.h" + +static struct dentry *debugfs_root; + +static int kfd_debugfs_open(struct inode *inode, struct file *file) +{ + int (*show)(struct seq_file *, void *) = inode->i_private; + + return single_open(file, show, NULL); +} + +static const struct file_operations kfd_debugfs_fops = { + .owner = THIS_MODULE, + .open = kfd_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kfd_debugfs_init(void) +{ + struct dentry *ent; + + debugfs_root = debugfs_create_dir("kfd", NULL); + if (!debugfs_root || debugfs_root == ERR_PTR(-ENODEV)) { + pr_warn("Failed to create kfd debugfs dir\n"); + return; + } + + ent = debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root, + kfd_debugfs_mqds_by_process, + &kfd_debugfs_fops); + if (!ent) + pr_warn("Failed to create mqds in kfd debugfs\n"); + + ent = debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root, + kfd_debugfs_hqds_by_device, + &kfd_debugfs_fops); + if (!ent) + pr_warn("Failed to create hqds in kfd debugfs\n"); + + ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, + kfd_debugfs_rls_by_device, + &kfd_debugfs_fops); + if (!ent) + pr_warn("Failed to create rls in kfd debugfs\n"); +} + +void kfd_debugfs_fini(void) +{ + debugfs_remove_recursive(debugfs_root); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 621a3b53a0384e1ff3aa367eeb242aef21b6dbf2..a8fa33a08de301fde244f1584e8a9edeeea40775 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -27,6 +27,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" +#include "cwsr_trap_handler_gfx8.asm" #define MQD_SIZE_ALIGNED 768 @@ -38,7 +39,8 @@ static const struct kfd_device_info kaveri_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -49,7 +51,8 @@ static const struct kfd_device_info carrizo_device_info = { .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, - .mqd_size_aligned = MQD_SIZE_ALIGNED + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, }; struct kfd_deviceid { @@ -212,6 +215,17 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, return AMD_IOMMU_INV_PRI_RSP_INVALID; } +static void kfd_cwsr_init(struct kfd_dev *kfd) +{ + if (cwsr_enable && kfd->device_info->supports_cwsr) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + kfd->cwsr_enabled = true; + } +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources) { @@ -224,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd - kfd->vm_info.first_vmid_kfd + 1; + /* Verify module parameters regarding mapped process number*/ + if ((hws_max_conc_proc < 0) + || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { + dev_err(kfd_device, + "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", + hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, + kfd->vm_info.vmid_num_kfd); + kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; + } else + kfd->max_proc_per_quantum = hws_max_conc_proc; + /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -286,6 +311,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_iommu_pasid_error; } + kfd_cwsr_init(kfd); + if (kfd_resume(kfd)) goto kfd_resume_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e202921c150e37c6e816ab49ce277c54c3304e8e..b21285afa4eac19f243ba1c22b3a486e1828eeae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocated_vmid) + struct qcm_process_device *qpd) { int retval; @@ -170,9 +169,11 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_unlock; } - *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) @@ -181,10 +182,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -EINVAL; if (retval) { - if (list_empty(&qpd->queues_list)) { + if (list_empty(&qpd->queues_list)) deallocate_vmid(dqm, qpd, q); - *allocated_vmid = 0; - } goto out_unlock; } @@ -809,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, int *allocate_vmid) + struct qcm_process_device *qpd) { int retval; struct mqd_manager *mqd; retval = 0; - if (allocate_vmid) - *allocate_vmid = 0; - mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { @@ -846,6 +842,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) @@ -1014,13 +1013,13 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(&q->list); qpd->queue_count--; - if (q->properties.is_active) + if (q->properties.is_active) { dqm->queue_count--; - - retval = execute_queues_cpsch(dqm, + retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - if (retval == -ETIME) - qpd->reset_wavefronts = true; + if (retval == -ETIME) + qpd->reset_wavefronts = true; + } mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -1034,7 +1033,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(&dqm->lock); - return 0; + return retval; failed: failed_try_destroy_debugged_queue: @@ -1110,6 +1109,26 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, return retval; } +static int set_trap_handler(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr) +{ + uint64_t *tma; + + if (dqm->dev->cwsr_enabled) { + /* Jump from CWSR trap handler to user trap */ + tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { + qpd->tba_addr = tba_addr; + qpd->tma_addr = tma_addr; + } + + return 0; +} + static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -1241,6 +1260,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_cpsch; break; case KFD_SCHED_POLICY_NO_HWS: @@ -1256,6 +1276,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_nocpsch; dqm->ops.uninitialize = uninitialize; dqm->ops.set_cache_memory_policy = set_cache_memory_policy; + dqm->ops.set_trap_handler = set_trap_handler; dqm->ops.process_termination = process_termination_nocpsch; break; default: @@ -1290,3 +1311,74 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) dqm->ops.uninitialize(dqm); kfree(dqm); } + +#if defined(CONFIG_DEBUG_FS) + +static void seq_reg_dump(struct seq_file *m, + uint32_t (*dump)[2], uint32_t n_regs) +{ + uint32_t i, count; + + for (i = 0, count = 0; i < n_regs; i++) { + if (count == 0 || + dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { + seq_printf(m, "%s %08x: %08x", + i ? "\n" : "", + dump[i][0], dump[i][1]); + count = 7; + } else { + seq_printf(m, " %08x", dump[i][1]); + count--; + } + } + + seq_puts(m, "\n"); +} + +int dqm_debugfs_hqds(struct seq_file *m, void *data) +{ + struct device_queue_manager *dqm = data; + uint32_t (*dump)[2], n_regs; + int pipe, queue; + int r = 0; + + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { + int pipe_offset = pipe * get_queues_per_pipe(dqm); + + for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { + if (!test_bit(pipe_offset + queue, + dqm->dev->shared_resources.queue_bitmap)) + continue; + + r = dqm->dev->kfd2kgd->hqd_dump( + dqm->dev->kgd, pipe, queue, &dump, &n_regs); + if (r) + break; + + seq_printf(m, " CP Pipe %d, Queue %d\n", + pipe, queue); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + } + + for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) { + for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { + r = dqm->dev->kfd2kgd->hqd_sdma_dump( + dqm->dev->kgd, pipe, queue, &dump, &n_regs); + if (r) + break; + + seq_printf(m, " SDMA Engine %d, RLC %d\n", + pipe, queue); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + } + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 5b77cb69f732d3e3c4ac4a1761da12ecdf07a983..c61b693bfa8c3a6aac0af58e5c45bff4d9a3cc42 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -84,8 +84,7 @@ struct device_process_node { struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocate_vmid); + struct qcm_process_device *qpd); int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -123,6 +122,11 @@ struct device_queue_manager_ops { void __user *alternate_aperture_base, uint64_t alternate_aperture_size); + int (*set_trap_handler)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr); + int (*process_termination)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index feb76c235b1a6ff70f3d28297744d408bb895091..ebb4da14e3df7a3fa1efeaf60551b211f7285298 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -116,8 +116,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) pr_debug("doorbell aperture size == 0x%08lX\n", kfd->shared_resources.doorbell_aperture_size); - pr_debug("doorbell kernel address == 0x%08lX\n", - (uintptr_t)kfd->doorbell_kernel_ptr); + pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); return 0; } @@ -194,8 +193,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" - " kernel address == 0x%08lX\n", - *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); + " kernel address == %p\n", + *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); return kfd->doorbell_kernel_ptr + inx; } @@ -215,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) { if (db) { writel(value, db); - pr_debug("Writing %d to doorbell address 0x%p\n", value, db); + pr_debug("Writing %d to doorbell address %p\n", value, db); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index cb92d4b72400953c453f92357b82fbd6ad4ef1fc..93aae5c1e78bf7a37a4e885a262f9de0b21981a4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, /* * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are - * running so the lookup function returns a locked process. + * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); @@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, } mutex_unlock(&p->event_mutex); - mutex_unlock(&p->mutex); + kfd_unref_process(p); } static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) @@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, /* * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are - * running so the lookup function returns a locked process. + * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); struct mm_struct *mm; @@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, */ mm = get_task_mm(p->lead_thread); if (!mm) { - mutex_unlock(&p->mutex); + kfd_unref_process(p); return; /* Process is exiting */ } @@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, &memory_exception_data); mutex_unlock(&p->event_mutex); - mutex_unlock(&p->mutex); + kfd_unref_process(p); } void kfd_signal_hw_exception_event(unsigned int pasid) @@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid) /* * Because we are called from arbitrary context (workqueue) as opposed * to process context, kfd_process could attempt to exit while we are - * running so the lookup function returns a locked process. + * running so the lookup function increments the process ref count. */ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); @@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid) lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); mutex_unlock(&p->event_mutex); - mutex_unlock(&p->mutex); + kfd_unref_process(p); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index c59384bbbc5fc6befa302f99d10b6fdc0b30af94..7377513050e663901071486492ce9a1881a65353 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -300,9 +300,14 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && + while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && id < NUM_OF_SUPPORTED_GPUS) { + if (!dev) { + id++; /* Skip non GPU devices */ + continue; + } + pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 8b0c0645d7c05ed8c95b9ef59556f7c8645da5bd..5dc6567d4a138b9de087c8d543e72e90a879b40e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -218,7 +218,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, rptr = *kq->rptr_kernel; wptr = *kq->wptr_kernel; queue_address = (unsigned int *)kq->pq_kernel_addr; - queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); + queue_size_dwords = kq->queue->properties.queue_size / 4; pr_debug("rptr: %d\n", rptr); pr_debug("wptr: %d\n", wptr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f744caeaee049587520c75524f3ee6e580b8a63e..3ac72bed4f310a803b426a2483e40161020d5e41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -50,6 +50,15 @@ module_param(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); +int hws_max_conc_proc = 8; +module_param(hws_max_conc_proc, int, 0444); +MODULE_PARM_DESC(hws_max_conc_proc, + "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); + +int cwsr_enable = 1; +module_param(cwsr_enable, int, 0444); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); + int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; module_param(max_num_of_queues_per_device, int, 0444); MODULE_PARM_DESC(max_num_of_queues_per_device, @@ -60,6 +69,11 @@ module_param(send_sigterm, int, 0444); MODULE_PARM_DESC(send_sigterm, "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); +int ignore_crat; +module_param(ignore_crat, int, 0444); +MODULE_PARM_DESC(ignore_crat, + "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, @@ -114,6 +128,8 @@ static int __init kfd_module_init(void) kfd_process_create_wq(); + kfd_debugfs_init(); + amdkfd_init_completed = 1; dev_info(kfd_device, "Initialized module\n"); @@ -130,6 +146,7 @@ static void __exit kfd_module_exit(void) { amdkfd_init_completed = 0; + kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_topology_shutdown(); kfd_chardev_exit(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 1f3a6ba7eed21e624cf80ccfc068dc7f6ef73929..8972bcfbf701c269c14c3bb6804d439429b2bb08 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -85,6 +85,10 @@ struct mqd_manager { uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); +#if defined(CONFIG_DEBUG_FS) + int (*debugfs_show_mqd)(struct seq_file *m, void *data); +#endif + struct mutex mqd_mutex; struct kfd_dev *dev; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 4728fad3fd7425ca2e0ef2fbb805dc145d078df7..f8ef4a051e08282e2cb6b629a6f8d9f75162829b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -36,6 +36,11 @@ static inline struct cik_mqd *get_mqd(void *mqd) return (struct cik_mqd *)mqd; } +static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +{ + return (struct cik_sdma_rlc_registers *)mqd; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -149,7 +154,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, { /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); + uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, @@ -160,7 +165,9 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); } static int update_mqd(struct mqd_manager *mm, void *mqd, @@ -176,8 +183,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, * Calculating queue size which is log base 2 of actual queue size -1 * dwords and another -1 for ffs */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); @@ -202,7 +208,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct cik_sdma_rlc_registers *m; m = get_sdma_mqd(mqd); - m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1) + m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | @@ -343,8 +349,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, * Calculating queue size which is log base 2 of actual queue * size -1 dwords */ - m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - - 1 - 1; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); @@ -360,15 +365,25 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, return 0; } -struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) -{ - struct cik_sdma_rlc_registers *m; +#if defined(CONFIG_DEBUG_FS) - m = (struct cik_sdma_rlc_registers *)mqd; +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct cik_mqd), false); + return 0; +} - return m; +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct cik_sdma_rlc_registers), false); + return 0; } +#endif + + struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { @@ -392,6 +407,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; @@ -400,6 +418,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; @@ -408,6 +429,9 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif break; default: kfree(mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 4ea854f9007b7a1f26045b8d57e0b5a306bd6619..971aec0637dc7fe3c2c04f1550c79505930926b8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -30,7 +30,7 @@ #include "vi_structs.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_enum.h" - +#include "oss/oss_3_0_sh_mask.h" #define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8 static inline struct vi_mqd *get_mqd(void *mqd) @@ -38,6 +38,11 @@ static inline struct vi_mqd *get_mqd(void *mqd) return (struct vi_mqd *)mqd; } +static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct vi_sdma_mqd *)mqd; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -84,6 +89,28 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_iq_rptr = 1; + if (q->tba_addr) { + m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8); + m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8); + m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8); + m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8); + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + *mqd = m; if (gart_addr) *gart_addr = addr; @@ -98,7 +125,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, { /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); - uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); + uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, (uint32_t __user *)p->write_ptr, @@ -116,8 +143,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT | mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; - m->cp_hqd_pq_control |= - ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); @@ -147,7 +173,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, * is safe, giving a maximum field value of 0xA. */ m->cp_hqd_eop_control |= min(0xA, - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); + order_base_2(q->eop_ring_buffer_size / 4) - 1); m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = @@ -163,6 +189,11 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = + atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | + mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; + q->is_active = (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0); @@ -234,6 +265,117 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, return retval; } +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct vi_sdma_mqd *m; + + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct vi_sdma_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct vi_sdma_mqd)); + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct vi_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell = + q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT; + + m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0); + + return 0; +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct vi_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct vi_sdma_mqd), false); + return 0; +} + +#endif + struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev) { @@ -257,6 +399,9 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; @@ -265,8 +410,20 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif break; case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif break; default: kfree(mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 16da8ad02d8beb32dcc7da884d4894c9b1b3ac43..0ecbd1f9b606bfffd6302e6b72ef4af60e9ea419 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -45,7 +45,7 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) header.u32All = 0; header.opcode = opcode; - header.count = packet_size/sizeof(uint32_t) - 2; + header.count = packet_size / 4 - 2; header.type = PM4_TYPE_3; return header.u32All; @@ -55,15 +55,27 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) { - unsigned int process_count, queue_count; + unsigned int process_count, queue_count, compute_queue_count; unsigned int map_queue_size; + unsigned int max_proc_per_quantum = 1; + struct kfd_dev *dev = pm->dqm->dev; process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; + compute_queue_count = queue_count - pm->dqm->sdma_queue_count; - /* check if there is over subscription*/ + /* check if there is over subscription + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ *over_subscription = false; - if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { + + if (dev->max_proc_per_quantum > 1) + max_proc_per_quantum = dev->max_proc_per_quantum; + + if ((process_count > max_proc_per_quantum) || + compute_queue_count > get_queues_num(pm->dqm)) { *over_subscription = true; pr_debug("Over subscribed runlist\n"); } @@ -116,10 +128,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { struct pm4_mes_runlist *packet; + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; if (WARN_ON(!ib)) return -EFAULT; + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + packet = (struct pm4_mes_runlist *)buffer; memset(buffer, 0, sizeof(struct pm4_mes_runlist)); @@ -130,6 +156,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, packet->bitfields4.chain = chain ? 1 : 0; packet->bitfields4.offload_polling = 0; packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; packet->ordinal2 = lower_32_bits(ib); packet->bitfields3.ib_base_hi = upper_32_bits(ib); @@ -251,6 +278,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; *rl_size_bytes = alloc_size_bytes; + pm->ib_size_bytes = alloc_size_bytes; pr_debug("Building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->queue_count); @@ -564,3 +592,26 @@ void pm_release_ib(struct packet_manager *pm) } mutex_unlock(&pm->lock); } + +#if defined(CONFIG_DEBUG_FS) + +int pm_debugfs_runlist(struct seq_file *m, void *data) +{ + struct packet_manager *pm = data; + + mutex_lock(&pm->lock); + + if (!pm->allocated) { + seq_puts(m, " No active runlist\n"); + goto out; + } + + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); + +out: + mutex_unlock(&pm->lock); + return 0; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index d6a796144269dce5888c2c7737cfb52e73232b7c..15fff4420e534fbd79c1cc1844244f715d348935 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -59,7 +59,7 @@ unsigned int kfd_pasid_alloc(void) struct kfd_dev *dev = NULL; unsigned int i = 0; - while ((dev = kfd_topology_enum_kfd_devices(i)) != NULL) { + while ((kfd_topology_enum_kfd_devices(i, &dev)) == 0) { if (dev && dev->kfd2kgd) { kfd2kgd = dev->kfd2kgd; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e4134c5b48196d0e9dfb87b2e6a0be37243be84..0bedcf9cc08c7945deaf86e6019505ed43aab6d1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -33,14 +33,17 @@ #include #include #include +#include +#include #include #include "amd_shared.h" #define KFD_SYSFS_FILE_MODE 0444 -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000 -#define KFD_MMAP_EVENTS_MASK 0x4000000000000 +#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull +#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull +#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull /* * When working with cp scheduler we should assign the HIQ manually or via @@ -62,6 +65,15 @@ #define KFD_MAX_NUM_OF_PROCESSES 512 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 +/* + * Size of the per-process TBA+TMA buffer: 2 pages + * + * The first page is the TBA used for the CWSR ISA code. The second + * page is used as TMA for daisy changing a user-mode trap handler. + */ +#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) +#define KFD_CWSR_TMA_OFFSET PAGE_SIZE + /* * Kernel module parameter to specify maximum number of supported queues per * device @@ -78,12 +90,26 @@ extern int max_num_of_queues_per_device; /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; +/* + * Kernel module parameter to specify the maximum process + * number per HW scheduler + */ +extern int hws_max_conc_proc; + +extern int cwsr_enable; + /* * Kernel module parameter to specify whether to send sigterm to HSA process on * unhandled exception */ extern int send_sigterm; +/* + * Ignore CRAT table during KFD initialization, can be used to work around + * broken CRAT tables on some AMD systems + */ +extern int ignore_crat; + /** * enum kfd_sched_policy * @@ -131,6 +157,7 @@ struct kfd_device_info { size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; + bool supports_cwsr; }; struct kfd_mem_obj { @@ -200,6 +227,14 @@ struct kfd_dev { /* Debug manager */ struct kfd_dbgmgr *dbgmgr; + + /* Maximum process number mapped to HW scheduler */ + unsigned int max_proc_per_quantum; + + /* CWSR */ + bool cwsr_enabled; + const void *cwsr_isa; + unsigned int cwsr_isa_size; }; /* KGD2KFD callbacks */ @@ -332,6 +367,9 @@ struct queue_properties { uint32_t eop_ring_buffer_size; uint64_t ctx_save_restore_area_address; uint32_t ctx_save_restore_area_size; + uint32_t ctl_stack_size; + uint64_t tba_addr; + uint64_t tma_addr; }; /** @@ -439,6 +477,11 @@ struct qcm_process_device { uint32_t num_gws; uint32_t num_oac; uint32_t sh_hidden_private_base; + + /* CWSR memory */ + void *cwsr_kaddr; + uint64_t tba_addr; + uint64_t tma_addr; }; @@ -501,6 +544,9 @@ struct kfd_process { */ void *mm; + struct kref ref; + struct work_struct release_work; + struct mutex mutex; /* @@ -563,9 +609,10 @@ struct amdkfd_ioctl_desc { void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); -struct kfd_process *kfd_create_process(const struct task_struct *); +struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); +void kfd_unref_process(struct kfd_process *p); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); @@ -577,6 +624,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma); + /* Process device data iterator */ struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p); @@ -624,9 +674,12 @@ int kfd_topology_init(void); void kfd_topology_shutdown(void); int kfd_topology_add_device(struct kfd_dev *gpu); int kfd_topology_remove_device(struct kfd_dev *gpu); +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); -struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); +int kfd_numa_node_to_apic_id(int numa_node_id); /* Interrupts */ int kfd_interrupt_init(struct kfd_dev *dev); @@ -643,8 +696,6 @@ int kgd2kfd_resume(struct kfd_dev *kfd); int kfd_init_apertures(struct kfd_process *process); /* Queue Context Management */ -struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd); - int init_queue(struct queue **q, const struct queue_properties *properties); void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); @@ -699,6 +750,7 @@ struct packet_manager { struct mutex lock; bool allocated; struct kfd_mem_obj *ib_buffer_obj; + unsigned int ib_size_bytes; }; int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); @@ -745,4 +797,23 @@ int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); +/* Debugfs */ +#if defined(CONFIG_DEBUG_FS) + +void kfd_debugfs_init(void); +void kfd_debugfs_fini(void); +int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); +int pqm_debugfs_mqds(struct seq_file *m, void *data); +int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); +int dqm_debugfs_hqds(struct seq_file *m, void *data); +int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); +int pm_debugfs_runlist(struct seq_file *m, void *data); + +#else + +static inline void kfd_debugfs_init(void) {} +static inline void kfd_debugfs_fini(void) {} + +#endif + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1f5ccd28bd41464932089e2916535d7152a0e942..4ff5f0fe6db83d49138a7f60bcfdce721e911d0d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -24,10 +24,12 @@ #include #include #include +#include #include #include #include #include +#include struct mm_struct; @@ -46,13 +48,12 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu); static struct workqueue_struct *kfd_process_wq; -struct kfd_process_release_work { - struct work_struct kfd_work; - struct kfd_process *p; -}; - static struct kfd_process *find_process(const struct task_struct *thread); -static struct kfd_process *create_process(const struct task_struct *thread); +static void kfd_process_ref_release(struct kref *ref); +static struct kfd_process *create_process(const struct task_struct *thread, + struct file *filep); +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep); + void kfd_process_create_wq(void) { @@ -68,9 +69,10 @@ void kfd_process_destroy_wq(void) } } -struct kfd_process *kfd_create_process(const struct task_struct *thread) +struct kfd_process *kfd_create_process(struct file *filep) { struct kfd_process *process; + struct task_struct *thread = current; if (!thread->mm) return ERR_PTR(-EINVAL); @@ -79,9 +81,6 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) if (thread->group_leader->mm != thread->mm) return ERR_PTR(-EINVAL); - /* Take mmap_sem because we call __mmu_notifier_register inside */ - down_write(&thread->mm->mmap_sem); - /* * take kfd processes mutex before starting of process creation * so there won't be a case where two threads of the same process @@ -93,14 +92,11 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) process = find_process(thread); if (process) pr_debug("Process already found\n"); - - if (!process) - process = create_process(thread); + else + process = create_process(thread, filep); mutex_unlock(&kfd_processes_mutex); - up_write(&thread->mm->mmap_sem); - return process; } @@ -144,63 +140,75 @@ static struct kfd_process *find_process(const struct task_struct *thread) return p; } -static void kfd_process_wq_release(struct work_struct *work) +void kfd_unref_process(struct kfd_process *p) +{ + kref_put(&p->ref, kfd_process_ref_release); +} + +static void kfd_process_destroy_pdds(struct kfd_process *p) { - struct kfd_process_release_work *my_work; struct kfd_process_device *pdd, *temp; - struct kfd_process *p; - my_work = (struct kfd_process_release_work *) work; + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", + pdd->dev->id, p->pasid); - p = my_work->p; + list_del(&pdd->per_device_list); - pr_debug("Releasing process (pasid %d) in workqueue\n", - p->pasid); + if (pdd->qpd.cwsr_kaddr) + free_pages((unsigned long)pdd->qpd.cwsr_kaddr, + get_order(KFD_CWSR_TBA_TMA_SIZE)); - mutex_lock(&p->mutex); + kfree(pdd); + } +} - list_for_each_entry_safe(pdd, temp, &p->per_device_data, - per_device_list) { - pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", - pdd->dev->id, p->pasid); +/* No process locking is needed in this function, because the process + * is not findable any more. We must assume that no other thread is + * using it any more, otherwise we couldn't safely free the process + * structure in the end. + */ +static void kfd_process_wq_release(struct work_struct *work) +{ + struct kfd_process *p = container_of(work, struct kfd_process, + release_work); + struct kfd_process_device *pdd; + pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { if (pdd->bound == PDD_BOUND) amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - - list_del(&pdd->per_device_list); - kfree(pdd); } + kfd_process_destroy_pdds(p); + kfd_event_free_process(p); kfd_pasid_free(p->pasid); kfd_free_process_doorbells(p); - mutex_unlock(&p->mutex); - mutex_destroy(&p->mutex); - kfree(p); + put_task_struct(p->lead_thread); - kfree(work); + kfree(p); } -static void kfd_process_destroy_delayed(struct rcu_head *rcu) +static void kfd_process_ref_release(struct kref *ref) { - struct kfd_process_release_work *work; - struct kfd_process *p; - - p = container_of(rcu, struct kfd_process, rcu); + struct kfd_process *p = container_of(ref, struct kfd_process, ref); - mmdrop(p->mm); + INIT_WORK(&p->release_work, kfd_process_wq_release); + queue_work(kfd_process_wq, &p->release_work); +} - work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); +static void kfd_process_destroy_delayed(struct rcu_head *rcu) +{ + struct kfd_process *p = container_of(rcu, struct kfd_process, rcu); - if (work) { - INIT_WORK((struct work_struct *) work, kfd_process_wq_release); - work->p = p; - queue_work(kfd_process_wq, (struct work_struct *) work); - } + kfd_unref_process(p); } static void kfd_process_notifier_release(struct mmu_notifier *mn, @@ -244,15 +252,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, kfd_process_dequeue_from_all_devices(p); pqm_uninit(&p->pqm); + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + mutex_unlock(&p->mutex); - /* - * Because we drop mm_count inside kfd_process_destroy_delayed - * and because the mmu_notifier_unregister function also drop - * mm_count we need to take an extra count here. - */ - mmgrab(p->mm); - mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); + mmu_notifier_unregister_no_release(&p->mmu_notifier, mm); mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); } @@ -260,7 +265,44 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, }; -static struct kfd_process *create_process(const struct task_struct *thread) +static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) +{ + unsigned long offset; + struct kfd_process_device *pdd = NULL; + struct kfd_dev *dev = NULL; + struct qcm_process_device *qpd = NULL; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + dev = pdd->dev; + qpd = &pdd->qpd; + if (!dev->cwsr_enabled || qpd->cwsr_kaddr) + continue; + offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + qpd->tba_addr = (int64_t)vm_mmap(filep, 0, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { + int err = qpd->tba_addr; + + pr_err("Failure to set tba address. error %d.\n", err); + qpd->tba_addr = 0; + qpd->cwsr_kaddr = NULL; + return err; + } + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; + pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", + qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); + } + + return 0; +} + +static struct kfd_process *create_process(const struct task_struct *thread, + struct file *filep) { struct kfd_process *process; int err = -ENOMEM; @@ -277,13 +319,15 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (kfd_alloc_process_doorbells(process) < 0) goto err_alloc_doorbells; + kref_init(&process->ref); + mutex_init(&process->mutex); process->mm = thread->mm; /* register notifier */ process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; - err = __mmu_notifier_register(&process->mmu_notifier, process->mm); + err = mmu_notifier_register(&process->mmu_notifier, process->mm); if (err) goto err_mmu_notifier; @@ -291,6 +335,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) (uintptr_t)process->mm); process->lead_thread = thread->group_leader; + get_task_struct(process->lead_thread); INIT_LIST_HEAD(&process->per_device_data); @@ -306,8 +351,14 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; + err = kfd_process_init_cwsr(process, filep); + if (err) + goto err_init_cwsr; + return process; +err_init_cwsr: + kfd_process_destroy_pdds(process); err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: @@ -343,16 +394,18 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process_device *pdd = NULL; pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); - if (pdd != NULL) { - pdd->dev = dev; - INIT_LIST_HEAD(&pdd->qpd.queues_list); - INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); - pdd->qpd.dqm = dev->dqm; - pdd->process = p; - pdd->bound = PDD_UNBOUND; - pdd->already_dequeued = false; - list_add(&pdd->per_device_list, &p->per_device_data); - } + if (!pdd) + return NULL; + + pdd->dev = dev; + INIT_LIST_HEAD(&pdd->qpd.queues_list); + INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); + pdd->qpd.dqm = dev->dqm; + pdd->qpd.pqm = &p->pqm; + pdd->process = p; + pdd->bound = PDD_UNBOUND; + pdd->already_dequeued = false; + list_add(&pdd->per_device_list, &p->per_device_data); return pdd; } @@ -408,7 +461,8 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev) hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { mutex_lock(&p->mutex); pdd = kfd_get_process_device_data(dev, p); - if (pdd->bound != PDD_BOUND_SUSPENDED) { + + if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { mutex_unlock(&p->mutex); continue; } @@ -448,6 +502,11 @@ void kfd_unbind_processes_from_device(struct kfd_dev *dev) mutex_lock(&p->mutex); pdd = kfd_get_process_device_data(dev, p); + if (WARN_ON(!pdd)) { + mutex_unlock(&p->mutex); + continue; + } + if (pdd->bound == PDD_BOUND) pdd->bound = PDD_BOUND_SUSPENDED; mutex_unlock(&p->mutex); @@ -483,6 +542,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) mutex_unlock(kfd_get_dbgmgr_mutex()); + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(dev, p); if (pdd) /* For GPU relying on IOMMU, we need to dequeue here @@ -491,6 +552,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) kfd_process_dequeue_from_device(pdd); mutex_unlock(&p->mutex); + + kfd_unref_process(p); } struct kfd_process_device *kfd_get_first_process_device_data( @@ -515,22 +578,86 @@ bool kfd_has_process_device_data(struct kfd_process *p) return !(list_empty(&p->per_device_data)); } -/* This returns with process->mutex locked. */ +/* This increments the process->ref counter. */ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) { - struct kfd_process *p; + struct kfd_process *p, *ret_p = NULL; unsigned int temp; int idx = srcu_read_lock(&kfd_processes_srcu); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { if (p->pasid == pasid) { - mutex_lock(&p->mutex); + kref_get(&p->ref); + ret_p = p; break; } } srcu_read_unlock(&kfd_processes_srcu, idx); - return p; + return ret_p; +} + +int kfd_reserved_mem_mmap(struct kfd_process *process, + struct vm_area_struct *vma) +{ + struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); + struct kfd_process_device *pdd; + struct qcm_process_device *qpd; + + if (!dev) + return -EINVAL; + if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { + pr_err("Incorrect CWSR mapping size.\n"); + return -EINVAL; + } + + pdd = kfd_get_process_device_data(dev, process); + if (!pdd) + return -EINVAL; + qpd = &pdd->qpd; + + qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(KFD_CWSR_TBA_TMA_SIZE)); + if (!qpd->cwsr_kaddr) { + pr_err("Error allocating per process CWSR buffer.\n"); + return -ENOMEM; + } + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND + | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; + /* Mapping pages to user process */ + return remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(qpd->cwsr_kaddr)), + KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); +} + +#if defined(CONFIG_DEBUG_FS) + +int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) +{ + struct kfd_process *p; + unsigned int temp; + int r = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + seq_printf(m, "Process %d PASID %d:\n", + p->lead_thread->tgid, p->pasid); + + mutex_lock(&p->mutex); + r = pqm_debugfs_mqds(m, &p->pqm); + mutex_unlock(&p->mutex); + + if (r) + break; + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return r; } + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index a3f1e62c60ba9d80b7d0244be3b3e3301a64557c..8763806326682c4c059359ef0b44dc9855e88ef5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -178,10 +178,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, return retval; if (list_empty(&pdd->qpd.queues_list) && - list_empty(&pdd->qpd.priv_queue_list)) { - pdd->qpd.pqm = pqm; + list_empty(&pdd->qpd.priv_queue_list)) dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); - } pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); if (!pqn) { @@ -203,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, - &q->properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -224,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, - &q->properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -315,6 +311,10 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (pqn->q) { dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); + if (retval) { + pr_debug("Destroy queue failed, returned %d\n", retval); + goto err_destroy_queue; + } uninit_queue(pqn->q); } @@ -326,6 +326,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) list_empty(&pdd->qpd.priv_queue_list)) dqm->ops.unregister_process(dqm, &pdd->qpd); +err_destroy_queue: return retval; } @@ -367,4 +368,67 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +#if defined(CONFIG_DEBUG_FS) + +int pqm_debugfs_mqds(struct seq_file *m, void *data) +{ + struct process_queue_manager *pqm = data; + struct process_queue_node *pqn; + struct queue *q; + enum KFD_MQD_TYPE mqd_type; + struct mqd_manager *mqd_manager; + int r = 0; + + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { + if (pqn->q) { + q = pqn->q; + switch (q->properties.type) { + case KFD_QUEUE_TYPE_SDMA: + seq_printf(m, " SDMA queue on device %x\n", + q->device->id); + mqd_type = KFD_MQD_TYPE_SDMA; + break; + case KFD_QUEUE_TYPE_COMPUTE: + seq_printf(m, " Compute queue on device %x\n", + q->device->id); + mqd_type = KFD_MQD_TYPE_CP; + break; + default: + seq_printf(m, + " Bad user queue type %d on device %x\n", + q->properties.type, q->device->id); + continue; + } + mqd_manager = q->device->dqm->ops.get_mqd_manager( + q->device->dqm, mqd_type); + } else if (pqn->kq) { + q = pqn->kq->queue; + mqd_manager = pqn->kq->mqd; + switch (q->properties.type) { + case KFD_QUEUE_TYPE_DIQ: + seq_printf(m, " DIQ on device %x\n", + pqn->kq->dev->id); + mqd_type = KFD_MQD_TYPE_HIQ; + break; + default: + seq_printf(m, + " Bad kernel queue type %d on device %x\n", + q->properties.type, + pqn->kq->dev->id); + continue; + } + } else { + seq_printf(m, + " Weird: Queue node with neither kernel nor user queue\n"); + continue; + } + + r = mqd_manager->debugfs_show_mqd(m, q->mqd); + if (r != 0) + break; + } + + return r; +} +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 19ce59028d6bdb93844ac1582d237a11202fea73..c6a76090a725055df2bc61cccc50e34056ea8574 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -28,27 +28,32 @@ #include #include #include +#include +#include #include "kfd_priv.h" #include "kfd_crat.h" #include "kfd_topology.h" +#include "kfd_device_queue_manager.h" +/* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; -static int topology_crat_parsed; static struct kfd_system_properties sys_props; static DECLARE_RWSEM(topology_lock); +static atomic_t topology_crat_proximity_domain; -struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; + struct kfd_topology_device *device = NULL; down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu_id == gpu_id) { - device = top_dev->gpu; + if (top_dev->proximity_domain == proximity_domain) { + device = top_dev; break; } @@ -57,7 +62,7 @@ struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) return device; } -struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) +struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) { struct kfd_topology_device *top_dev; struct kfd_dev *device = NULL; @@ -65,7 +70,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu->pdev == pdev) { + if (top_dev->gpu_id == gpu_id) { device = top_dev->gpu; break; } @@ -75,282 +80,31 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) return device; } -static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) -{ - struct acpi_table_header *crat_table; - acpi_status status; - - if (!size) - return -EINVAL; - - /* - * Fetch the CRAT table from ACPI - */ - status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); - if (status == AE_NOT_FOUND) { - pr_warn("CRAT table not found\n"); - return -ENODATA; - } else if (ACPI_FAILURE(status)) { - const char *err = acpi_format_exception(status); - - pr_err("CRAT table error: %s\n", err); - return -EINVAL; - } - - if (*size >= crat_table->length && crat_image != NULL) - memcpy(crat_image, crat_table, crat_table->length); - - *size = crat_table->length; - - return 0; -} - -static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, - struct crat_subtype_computeunit *cu) -{ - dev->node_props.cpu_cores_count = cu->num_cpu_cores; - dev->node_props.cpu_core_id_base = cu->processor_id_low; - if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) - dev->node_props.capability |= HSA_CAP_ATS_PRESENT; - - pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, - cu->processor_id_low); -} - -static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, - struct crat_subtype_computeunit *cu) -{ - dev->node_props.simd_id_base = cu->processor_id_low; - dev->node_props.simd_count = cu->num_simd_cores; - dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; - dev->node_props.max_waves_per_simd = cu->max_waves_simd; - dev->node_props.wave_front_size = cu->wave_front_size; - dev->node_props.mem_banks_count = cu->num_banks; - dev->node_props.array_count = cu->num_arrays; - dev->node_props.cu_per_simd_array = cu->num_cu_per_array; - dev->node_props.simd_per_cu = cu->num_simd_per_cu; - dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; - if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) - dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; - pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores, - cu->processor_id_low); -} - -/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ -static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) -{ - struct kfd_topology_device *dev; - int i = 0; - - pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", - cu->proximity_domain, cu->hsa_capability); - list_for_each_entry(dev, &topology_device_list, list) { - if (cu->proximity_domain == i) { - if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) - kfd_populated_cu_info_cpu(dev, cu); - - if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) - kfd_populated_cu_info_gpu(dev, cu); - break; - } - i++; - } - - return 0; -} - -/* - * kfd_parse_subtype_mem is called when the topology mutex is - * already acquired - */ -static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) -{ - struct kfd_mem_properties *props; - struct kfd_topology_device *dev; - int i = 0; - - pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", - mem->promixity_domain); - list_for_each_entry(dev, &topology_device_list, list) { - if (mem->promixity_domain == i) { - props = kfd_alloc_struct(props); - if (props == NULL) - return -ENOMEM; - - if (dev->node_props.cpu_cores_count == 0) - props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE; - else - props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; - - if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) - props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; - if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) - props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; - - props->size_in_bytes = - ((uint64_t)mem->length_high << 32) + - mem->length_low; - props->width = mem->width; - - dev->mem_bank_count++; - list_add_tail(&props->list, &dev->mem_props); - - break; - } - i++; - } - - return 0; -} - -/* - * kfd_parse_subtype_cache is called when the topology mutex - * is already acquired - */ -static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) -{ - struct kfd_cache_properties *props; - struct kfd_topology_device *dev; - uint32_t id; - - id = cache->processor_id_low; - - pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); - list_for_each_entry(dev, &topology_device_list, list) - if (id == dev->node_props.cpu_core_id_base || - id == dev->node_props.simd_id_base) { - props = kfd_alloc_struct(props); - if (props == NULL) - return -ENOMEM; - - props->processor_id_low = id; - props->cache_level = cache->cache_level; - props->cache_size = cache->cache_size; - props->cacheline_size = cache->cache_line_size; - props->cachelines_per_tag = cache->lines_per_tag; - props->cache_assoc = cache->associativity; - props->cache_latency = cache->cache_latency; - - if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) - props->cache_type |= HSA_CACHE_TYPE_DATA; - if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) - props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; - if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) - props->cache_type |= HSA_CACHE_TYPE_CPU; - if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) - props->cache_type |= HSA_CACHE_TYPE_HSACU; - - dev->cache_count++; - dev->node_props.caches_count++; - list_add_tail(&props->list, &dev->cache_props); - - break; - } - - return 0; -} - -/* - * kfd_parse_subtype_iolink is called when the topology mutex - * is already acquired - */ -static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) +struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) { - struct kfd_iolink_properties *props; - struct kfd_topology_device *dev; - uint32_t i = 0; - uint32_t id_from; - uint32_t id_to; - - id_from = iolink->proximity_domain_from; - id_to = iolink->proximity_domain_to; + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; - pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from); - list_for_each_entry(dev, &topology_device_list, list) { - if (id_from == i) { - props = kfd_alloc_struct(props); - if (props == NULL) - return -ENOMEM; - - props->node_from = id_from; - props->node_to = id_to; - props->ver_maj = iolink->version_major; - props->ver_min = iolink->version_minor; - - /* - * weight factor (derived from CDIR), currently always 1 - */ - props->weight = 1; - - props->min_latency = iolink->minimum_latency; - props->max_latency = iolink->maximum_latency; - props->min_bandwidth = iolink->minimum_bandwidth_mbs; - props->max_bandwidth = iolink->maximum_bandwidth_mbs; - props->rec_transfer_size = - iolink->recommended_transfer_size; - - dev->io_link_count++; - dev->node_props.io_links_count++; - list_add_tail(&props->list, &dev->io_link_props); + down_read(&topology_lock); + list_for_each_entry(top_dev, &topology_device_list, list) + if (top_dev->gpu->pdev == pdev) { + device = top_dev->gpu; break; } - i++; - } - return 0; -} - -static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) -{ - struct crat_subtype_computeunit *cu; - struct crat_subtype_memory *mem; - struct crat_subtype_cache *cache; - struct crat_subtype_iolink *iolink; - int ret = 0; - - switch (sub_type_hdr->type) { - case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: - cu = (struct crat_subtype_computeunit *)sub_type_hdr; - ret = kfd_parse_subtype_cu(cu); - break; - case CRAT_SUBTYPE_MEMORY_AFFINITY: - mem = (struct crat_subtype_memory *)sub_type_hdr; - ret = kfd_parse_subtype_mem(mem); - break; - case CRAT_SUBTYPE_CACHE_AFFINITY: - cache = (struct crat_subtype_cache *)sub_type_hdr; - ret = kfd_parse_subtype_cache(cache); - break; - case CRAT_SUBTYPE_TLB_AFFINITY: - /* - * For now, nothing to do here - */ - pr_info("Found TLB entry in CRAT table (not processing)\n"); - break; - case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: - /* - * For now, nothing to do here - */ - pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n"); - break; - case CRAT_SUBTYPE_IOLINK_AFFINITY: - iolink = (struct crat_subtype_iolink *)sub_type_hdr; - ret = kfd_parse_subtype_iolink(iolink); - break; - default: - pr_warn("Unknown subtype (%d) in CRAT\n", - sub_type_hdr->type); - } + up_read(&topology_lock); - return ret; + return device; } +/* Called with write topology_lock acquired */ static void kfd_release_topology_device(struct kfd_topology_device *dev) { struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_perf_properties *perf; list_del(&dev->list); @@ -375,25 +129,35 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) kfree(iolink); } - kfree(dev); + while (dev->perf_props.next != &dev->perf_props) { + perf = container_of(dev->perf_props.next, + struct kfd_perf_properties, list); + list_del(&perf->list); + kfree(perf); + } - sys_props.num_devices--; + kfree(dev); } -static void kfd_release_live_view(void) +void kfd_release_topology_device_list(struct list_head *device_list) { struct kfd_topology_device *dev; - while (topology_device_list.next != &topology_device_list) { - dev = container_of(topology_device_list.next, - struct kfd_topology_device, list); + while (!list_empty(device_list)) { + dev = list_first_entry(device_list, + struct kfd_topology_device, list); kfd_release_topology_device(dev); + } } +static void kfd_release_live_view(void) +{ + kfd_release_topology_device_list(&topology_device_list); memset(&sys_props, 0, sizeof(sys_props)); } -static struct kfd_topology_device *kfd_create_topology_device(void) +struct kfd_topology_device *kfd_create_topology_device( + struct list_head *device_list) { struct kfd_topology_device *dev; @@ -406,65 +170,13 @@ static struct kfd_topology_device *kfd_create_topology_device(void) INIT_LIST_HEAD(&dev->mem_props); INIT_LIST_HEAD(&dev->cache_props); INIT_LIST_HEAD(&dev->io_link_props); + INIT_LIST_HEAD(&dev->perf_props); - list_add_tail(&dev->list, &topology_device_list); - sys_props.num_devices++; + list_add_tail(&dev->list, device_list); return dev; } -static int kfd_parse_crat_table(void *crat_image) -{ - struct kfd_topology_device *top_dev; - struct crat_subtype_generic *sub_type_hdr; - uint16_t node_id; - int ret; - struct crat_header *crat_table = (struct crat_header *)crat_image; - uint16_t num_nodes; - uint32_t image_len; - - if (!crat_image) - return -EINVAL; - - num_nodes = crat_table->num_domains; - image_len = crat_table->length; - - pr_info("Parsing CRAT table with %d nodes\n", num_nodes); - - for (node_id = 0; node_id < num_nodes; node_id++) { - top_dev = kfd_create_topology_device(); - if (!top_dev) { - kfd_release_live_view(); - return -ENOMEM; - } - } - - sys_props.platform_id = - (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK; - sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id); - sys_props.platform_rev = crat_table->revision; - - sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); - while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < - ((char *)crat_image) + image_len) { - if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { - ret = kfd_parse_subtype(sub_type_hdr); - if (ret != 0) { - kfd_release_live_view(); - return ret; - } - } - - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length); - } - - sys_props.generation_count++; - topology_crat_parsed = 1; - - return 0; -} - #define sysfs_show_gen_prop(buffer, fmt, ...) \ snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) @@ -501,11 +213,17 @@ static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, return ret; } +static void kfd_topology_kobj_release(struct kobject *kobj) +{ + kfree(kobj); +} + static const struct sysfs_ops sysprops_ops = { .show = sysprops_show, }; static struct kobj_type sysprops_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &sysprops_ops, }; @@ -541,6 +259,7 @@ static const struct sysfs_ops iolink_ops = { }; static struct kobj_type iolink_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &iolink_ops, }; @@ -568,6 +287,7 @@ static const struct sysfs_ops mem_ops = { }; static struct kobj_type mem_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &mem_ops, }; @@ -575,7 +295,7 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, char *buffer) { ssize_t ret; - uint32_t i; + uint32_t i, j; struct kfd_cache_properties *cache; /* Making sure that the buffer is an empty string */ @@ -593,12 +313,18 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency); sysfs_show_32bit_prop(buffer, "type", cache->cache_type); snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer); - for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++) - ret = snprintf(buffer, PAGE_SIZE, "%s%d%s", - buffer, cache->sibling_map[i], - (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ? - "\n" : ","); - + for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++) + for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) { + /* Check each bit */ + if (cache->sibling_map[i] & (1 << j)) + ret = snprintf(buffer, PAGE_SIZE, + "%s%d%s", buffer, 1, ","); + else + ret = snprintf(buffer, PAGE_SIZE, + "%s%d%s", buffer, 0, ","); + } + /* Replace the last "," with end of line */ + *(buffer + strlen(buffer) - 1) = 0xA; return ret; } @@ -607,9 +333,43 @@ static const struct sysfs_ops cache_ops = { }; static struct kobj_type cache_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &cache_ops, }; +/****** Sysfs of Performance Counters ******/ + +struct kfd_perf_attr { + struct kobj_attribute attr; + uint32_t data; +}; + +static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs, + char *buf) +{ + struct kfd_perf_attr *attr; + + buf[0] = 0; + attr = container_of(attrs, struct kfd_perf_attr, attr); + if (!attr->data) /* invalid data for PMC */ + return 0; + else + return sysfs_show_32bit_val(buf, attr->data); +} + +#define KFD_PERF_DESC(_name, _data) \ +{ \ + .attr = __ATTR(_name, 0444, perf_show, NULL), \ + .data = _data, \ +} + +static struct kfd_perf_attr perf_attr_iommu[] = { + KFD_PERF_DESC(max_concurrent, 0), + KFD_PERF_DESC(num_counters, 0), + KFD_PERF_DESC(counter_ids, 0), +}; +/****************************************/ + static ssize_t node_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -646,18 +406,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.cpu_cores_count); sysfs_show_32bit_prop(buffer, "simd_count", dev->node_props.simd_count); - - if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_info_once("mem_banks_count truncated from %d to %d\n", - dev->node_props.mem_banks_count, - dev->mem_bank_count); - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->mem_bank_count); - } else { - sysfs_show_32bit_prop(buffer, "mem_banks_count", - dev->node_props.mem_banks_count); - } - + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->node_props.mem_banks_count); sysfs_show_32bit_prop(buffer, "caches_count", dev->node_props.caches_count); sysfs_show_32bit_prop(buffer, "io_links_count", @@ -705,9 +455,12 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, HSA_CAP_WATCH_POINTS_TOTALBITS_MASK); } + if (dev->gpu->device_info->asic_family == CHIP_TONGA) + dev->node_props.capability |= + HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", - dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz( - dev->gpu->kgd)); + dev->node_props.max_engine_clk_fcompute); sysfs_show_64bit_prop(buffer, "local_mem_size", (unsigned long long int) 0); @@ -729,6 +482,7 @@ static const struct sysfs_ops node_ops = { }; static struct kobj_type node_type = { + .release = kfd_topology_kobj_release, .sysfs_ops = &node_ops, }; @@ -744,6 +498,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + struct kfd_perf_properties *perf; if (dev->kobj_iolink) { list_for_each_entry(iolink, &dev->io_link_props, list) @@ -780,6 +535,16 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) dev->kobj_mem = NULL; } + if (dev->kobj_perf) { + list_for_each_entry(perf, &dev->perf_props, list) { + kfree(perf->attr_group); + perf->attr_group = NULL; + } + kobject_del(dev->kobj_perf); + kobject_put(dev->kobj_perf); + dev->kobj_perf = NULL; + } + if (dev->kobj_node) { sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); sysfs_remove_file(dev->kobj_node, &dev->attr_name); @@ -796,8 +561,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; + struct kfd_perf_properties *perf; int ret; - uint32_t i; + uint32_t i, num_attrs; + struct attribute **attrs; if (WARN_ON(dev->kobj_node)) return -EEXIST; @@ -826,6 +593,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (!dev->kobj_iolink) return -ENOMEM; + dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node); + if (!dev->kobj_perf) + return -ENOMEM; + /* * Creating sysfs files for node properties */ @@ -903,11 +674,38 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, if (ret < 0) return ret; i++; -} + } + + /* All hardware blocks have the same number of attributes. */ + num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); + list_for_each_entry(perf, &dev->perf_props, list) { + perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) + * num_attrs + sizeof(struct attribute_group), + GFP_KERNEL); + if (!perf->attr_group) + return -ENOMEM; + + attrs = (struct attribute **)(perf->attr_group + 1); + if (!strcmp(perf->block_name, "iommu")) { + /* Information of IOMMU's num_counters and counter_ids is shown + * under /sys/bus/event_source/devices/amd_iommu. We don't + * duplicate here. + */ + perf_attr_iommu[0].data = perf->max_concurrent; + for (i = 0; i < num_attrs; i++) + attrs[i] = &perf_attr_iommu[i].attr.attr; + } + perf->attr_group->name = perf->block_name; + perf->attr_group->attrs = attrs; + ret = sysfs_create_group(dev->kobj_perf, perf->attr_group); + if (ret < 0) + return ret; + } return 0; } +/* Called with write topology lock acquired */ static int kfd_build_sysfs_node_tree(void) { struct kfd_topology_device *dev; @@ -924,6 +722,7 @@ static int kfd_build_sysfs_node_tree(void) return 0; } +/* Called with write topology lock acquired */ static void kfd_remove_sysfs_node_tree(void) { struct kfd_topology_device *dev; @@ -995,75 +794,246 @@ static void kfd_topology_release_sysfs(void) } } +/* Called with write topology_lock acquired */ +static void kfd_topology_update_device_list(struct list_head *temp_list, + struct list_head *master_list) +{ + while (!list_empty(temp_list)) { + list_move_tail(temp_list->next, master_list); + sys_props.num_devices++; + } +} + +static void kfd_debug_print_topology(void) +{ + struct kfd_topology_device *dev; + + down_read(&topology_lock); + + dev = list_last_entry(&topology_device_list, + struct kfd_topology_device, list); + if (dev) { + if (dev->node_props.cpu_cores_count && + dev->node_props.simd_count) { + pr_info("Topology: Add APU node [0x%0x:0x%0x]\n", + dev->node_props.device_id, + dev->node_props.vendor_id); + } else if (dev->node_props.cpu_cores_count) + pr_info("Topology: Add CPU node\n"); + else if (dev->node_props.simd_count) + pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n", + dev->node_props.device_id, + dev->node_props.vendor_id); + } + up_read(&topology_lock); +} + +/* Helper function for intializing platform_xx members of + * kfd_system_properties. Uses OEM info from the last CPU/APU node. + */ +static void kfd_update_system_properties(void) +{ + struct kfd_topology_device *dev; + + down_read(&topology_lock); + dev = list_last_entry(&topology_device_list, + struct kfd_topology_device, list); + if (dev) { + sys_props.platform_id = + (*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK; + sys_props.platform_oem = *((uint64_t *)dev->oem_table_id); + sys_props.platform_rev = dev->oem_revision; + } + up_read(&topology_lock); +} + +static void find_system_memory(const struct dmi_header *dm, + void *private) +{ + struct kfd_mem_properties *mem; + u16 mem_width, mem_clock; + struct kfd_topology_device *kdev = + (struct kfd_topology_device *)private; + const u8 *dmi_data = (const u8 *)(dm + 1); + + if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) { + mem_width = (u16)(*(const u16 *)(dmi_data + 0x6)); + mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11)); + list_for_each_entry(mem, &kdev->mem_props, list) { + if (mem_width != 0xFFFF && mem_width != 0) + mem->width = mem_width; + if (mem_clock != 0) + mem->mem_clk_max = mem_clock; + } + } +} + +/* + * Performance counters information is not part of CRAT but we would like to + * put them in the sysfs under topology directory for Thunk to get the data. + * This function is called before updating the sysfs. + */ +static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) +{ + struct kfd_perf_properties *props; + + if (amd_iommu_pc_supported()) { + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + strcpy(props->block_name, "iommu"); + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ + list_add_tail(&props->list, &kdev->perf_props); + } + + return 0; +} + +/* kfd_add_non_crat_information - Add information that is not currently + * defined in CRAT but is necessary for KFD topology + * @dev - topology device to which addition info is added + */ +static void kfd_add_non_crat_information(struct kfd_topology_device *kdev) +{ + /* Check if CPU only node. */ + if (!kdev->gpu) { + /* Add system memory information */ + dmi_walk(find_system_memory, kdev); + } + /* TODO: For GPU node, rearrange code from kfd_topology_add_device */ +} + +/* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices. + * Ignore CRAT for all other devices. AMD APU is identified if both CPU + * and GPU cores are present. + * @device_list - topology device list created by parsing ACPI CRAT table. + * @return - TRUE if invalid, FALSE is valid. + */ +static bool kfd_is_acpi_crat_invalid(struct list_head *device_list) +{ + struct kfd_topology_device *dev; + + list_for_each_entry(dev, device_list, list) { + if (dev->node_props.cpu_cores_count && + dev->node_props.simd_count) + return false; + } + pr_info("Ignoring ACPI CRAT on non-APU system\n"); + return true; +} + int kfd_topology_init(void) { void *crat_image = NULL; size_t image_size = 0; int ret; - - /* - * Initialize the head for the topology device list + struct list_head temp_topology_device_list; + int cpu_only_node = 0; + struct kfd_topology_device *kdev; + int proximity_domain; + + /* topology_device_list - Master list of all topology devices + * temp_topology_device_list - temporary list created while parsing CRAT + * or VCRAT. Once parsing is complete the contents of list is moved to + * topology_device_list */ + + /* Initialize the head for the both the lists */ INIT_LIST_HEAD(&topology_device_list); + INIT_LIST_HEAD(&temp_topology_device_list); init_rwsem(&topology_lock); - topology_crat_parsed = 0; memset(&sys_props, 0, sizeof(sys_props)); + /* Proximity domains in ACPI CRAT tables start counting at + * 0. The same should be true for virtual CRAT tables created + * at this stage. GPUs added later in kfd_topology_add_device + * use a counter. + */ + proximity_domain = 0; + /* - * Get the CRAT image from the ACPI + * Get the CRAT image from the ACPI. If ACPI doesn't have one + * or if ACPI CRAT is invalid create a virtual CRAT. + * NOTE: The current implementation expects all AMD APUs to have + * CRAT. If no CRAT is available, it is assumed to be a CPU */ - ret = kfd_topology_get_crat_acpi(crat_image, &image_size); - if (ret == 0 && image_size > 0) { - pr_info("Found CRAT image with size=%zd\n", image_size); - crat_image = kmalloc(image_size, GFP_KERNEL); - if (!crat_image) { - ret = -ENOMEM; - pr_err("No memory for allocating CRAT image\n"); - goto err; + ret = kfd_create_crat_image_acpi(&crat_image, &image_size); + if (!ret) { + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret || + kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { + kfd_release_topology_device_list( + &temp_topology_device_list); + kfd_destroy_crat_image(crat_image); + crat_image = NULL; } - ret = kfd_topology_get_crat_acpi(crat_image, &image_size); - - if (ret == 0) { - down_write(&topology_lock); - ret = kfd_parse_crat_table(crat_image); - if (ret == 0) - ret = kfd_topology_update_sysfs(); - up_write(&topology_lock); - } else { - pr_err("Couldn't get CRAT table size from ACPI\n"); + } + + if (!crat_image) { + ret = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_CPU, NULL, + proximity_domain); + cpu_only_node = 1; + if (ret) { + pr_err("Error creating VCRAT table for CPU\n"); + return ret; } - kfree(crat_image); - } else if (ret == -ENODATA) { - ret = 0; - } else { - pr_err("Couldn't get CRAT table size from ACPI\n"); + + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret) { + pr_err("Error parsing VCRAT table for CPU\n"); + goto err; + } + } + + kdev = list_first_entry(&temp_topology_device_list, + struct kfd_topology_device, list); + kfd_add_perf_to_topology(kdev); + + down_write(&topology_lock); + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); + atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + ret = kfd_topology_update_sysfs(); + up_write(&topology_lock); + + if (!ret) { + sys_props.generation_count++; + kfd_update_system_properties(); + kfd_debug_print_topology(); + pr_info("Finished initializing topology\n"); + } else + pr_err("Failed to update topology in sysfs ret=%d\n", ret); + + /* For nodes with GPU, this information gets added + * when GPU is detected (kfd_topology_add_device). + */ + if (cpu_only_node) { + /* Add additional information to CPU only node created above */ + down_write(&topology_lock); + kdev = list_first_entry(&topology_device_list, + struct kfd_topology_device, list); + up_write(&topology_lock); + kfd_add_non_crat_information(kdev); } err: - pr_info("Finished initializing topology ret=%d\n", ret); + kfd_destroy_crat_image(crat_image); return ret; } void kfd_topology_shutdown(void) { + down_write(&topology_lock); kfd_topology_release_sysfs(); kfd_release_live_view(); -} - -static void kfd_debug_print_topology(void) -{ - struct kfd_topology_device *dev; - uint32_t i = 0; - - pr_info("DEBUG PRINT OF TOPOLOGY:"); - list_for_each_entry(dev, &topology_device_list, list) { - pr_info("Node: %d\n", i); - pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no")); - pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count); - pr_info("\tSIMD count: %d", dev->node_props.simd_count); - i++; - } + up_write(&topology_lock); } static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) @@ -1072,11 +1042,15 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) uint32_t buf[7]; uint64_t local_mem_size; int i; + struct kfd_local_mem_info local_mem_info; if (!gpu) return 0; - local_mem_size = gpu->kfd2kgd->get_vmem_size(gpu->kgd); + gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info); + + local_mem_size = local_mem_info.local_mem_size_private + + local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; buf[1] = gpu->pdev->subsystem_vendor; @@ -1091,19 +1065,26 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) return hashout; } - +/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If + * the GPU device is not already present in the topology device + * list then return NULL. This means a new topology device has to + * be created for this GPU. + * TODO: Rather than assiging @gpu to first topology device withtout + * gpu attached, it will better to have more stringent check. + */ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) { struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; + down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; break; } - + up_write(&topology_lock); return out_dev; } @@ -1115,84 +1096,196 @@ static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) */ } +/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info, + * patch this after CRAT parsing. + */ +static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) +{ + struct kfd_mem_properties *mem; + struct kfd_local_mem_info local_mem_info; + + if (!dev) + return; + + /* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with + * single bank of VRAM local memory. + * for dGPUs - VCRAT reports only one bank of Local Memory + * for APUs - If CRAT from ACPI reports more than one bank, then + * all the banks will report the same mem_clk_max information + */ + dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd, + &local_mem_info); + + list_for_each_entry(mem, &dev->mem_props, list) + mem->mem_clk_max = local_mem_info.mem_clk_max; +} + +static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) +{ + struct kfd_iolink_properties *link; + + if (!dev || !dev->gpu) + return; + + /* GPU only creates direck links so apply flags setting to all */ + if (dev->gpu->device_info->asic_family == CHIP_HAWAII) + list_for_each_entry(link, &dev->io_link_props, list) + link->flags = CRAT_IOLINK_FLAGS_ENABLED | + CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; +} + int kfd_topology_add_device(struct kfd_dev *gpu) { uint32_t gpu_id; struct kfd_topology_device *dev; - int res; + struct kfd_cu_info cu_info; + int res = 0; + struct list_head temp_topology_device_list; + void *crat_image = NULL; + size_t image_size = 0; + int proximity_domain; + + INIT_LIST_HEAD(&temp_topology_device_list); gpu_id = kfd_generate_gpu_id(gpu); pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - down_write(&topology_lock); - /* - * Try to assign the GPU to existing topology device (generated from - * CRAT table + proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); + + /* Check to see if this gpu device exists in the topology_device_list. + * If so, assign the gpu to that device, + * else create a Virtual CRAT for this gpu device and then parse that + * CRAT to create a new topology device. Once created assign the gpu to + * that topology device */ dev = kfd_assign_gpu(gpu); if (!dev) { - pr_info("GPU was not found in the current topology. Extending.\n"); - kfd_debug_print_topology(); - dev = kfd_create_topology_device(); - if (!dev) { - res = -ENOMEM; + res = kfd_create_crat_image_virtual(&crat_image, &image_size, + COMPUTE_UNIT_GPU, gpu, + proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + return res; + } + res = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); goto err; } - dev->gpu = gpu; - /* - * TODO: Make a call to retrieve topology information from the - * GPU vBIOS - */ + down_write(&topology_lock); + kfd_topology_update_device_list(&temp_topology_device_list, + &topology_device_list); /* Update the SYSFS tree, since we added another topology * device */ - if (kfd_topology_update_sysfs() < 0) - kfd_topology_release_sysfs(); - + res = kfd_topology_update_sysfs(); + up_write(&topology_lock); + + if (!res) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + dev = kfd_assign_gpu(gpu); + if (WARN_ON(!dev)) { + res = -ENODEV; + goto err; + } } dev->gpu_id = gpu_id; gpu->id = gpu_id; + + /* TODO: Move the following lines to function + * kfd_add_non_crat_information + */ + + /* Fill-in additional information that is not available in CRAT but + * needed for the topology + */ + + dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info); + dev->node_props.simd_arrays_per_engine = + cu_info.num_shader_arrays_per_engine; + dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; - dev->node_props.location_id = (gpu->pdev->bus->number << 24) + - (gpu->pdev->devfn & 0xffffff); - /* - * TODO: Retrieve max engine clock values from KGD - */ + dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number, + gpu->pdev->devfn); + dev->node_props.max_engine_clk_fcompute = + dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); + dev->node_props.max_engine_clk_ccompute = + cpufreq_quick_get_max(0) / 1000; + + kfd_fill_mem_clk_max_info(dev); + kfd_fill_iolink_non_crat_info(dev); + + switch (dev->gpu->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + case CHIP_TONGA: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + case CHIP_CARRIZO: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + pr_debug("Adding doorbell packet type capability\n"); + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->gpu->device_info->asic_family); + } + /* Fix errors in CZ CRAT. + * simd_count: Carrizo CRAT reports wrong simd_count, probably + * because it doesn't consider masked out CUs + * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd + * capability flag: Carrizo CRAT doesn't report IOMMU flags + */ if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { - dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; - pr_info("Adding doorbell packet type capability\n"); + dev->node_props.simd_count = + cu_info.simd_per_cu * cu_info.cu_active_number; + dev->node_props.max_waves_per_simd = 10; + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; } - res = 0; - -err: - up_write(&topology_lock); + kfd_debug_print_topology(); - if (res == 0) + if (!res) kfd_notify_gpu_change(gpu_id, 1); - +err: + kfd_destroy_crat_image(crat_image); return res; } int kfd_topology_remove_device(struct kfd_dev *gpu) { - struct kfd_topology_device *dev; + struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; int res = -ENODEV; down_write(&topology_lock); - list_for_each_entry(dev, &topology_device_list, list) + list_for_each_entry_safe(dev, tmp, &topology_device_list, list) if (dev->gpu == gpu) { gpu_id = dev->gpu_id; kfd_remove_sysfs_node_entry(dev); kfd_release_topology_device(dev); + sys_props.num_devices--; res = 0; if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); @@ -1201,28 +1294,32 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) up_write(&topology_lock); - if (res == 0) + if (!res) kfd_notify_gpu_change(gpu_id, 0); return res; } -/* - * When idx is out of bounds, the function will return NULL +/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD + * topology. If GPU device is found @idx, then valid kfd_dev pointer is + * returned through @kdev + * Return - 0: On success (@kdev will be NULL for non GPU nodes) + * -1: If end of list */ -struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) +int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev) { struct kfd_topology_device *top_dev; - struct kfd_dev *device = NULL; uint8_t device_idx = 0; + *kdev = NULL; down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) { if (device_idx == idx) { - device = top_dev->gpu; - break; + *kdev = top_dev->gpu; + up_read(&topology_lock); + return 0; } device_idx++; @@ -1230,6 +1327,88 @@ struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) up_read(&topology_lock); - return device; + return -1; + +} + +static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) +{ + const struct cpuinfo_x86 *cpuinfo; + int first_cpu_of_numa_node; + + if (!cpumask || cpumask == cpu_none_mask) + return -1; + first_cpu_of_numa_node = cpumask_first(cpumask); + if (first_cpu_of_numa_node >= nr_cpu_ids) + return -1; + cpuinfo = &cpu_data(first_cpu_of_numa_node); + return cpuinfo->apicid; } + +/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor + * of the given NUMA node (numa_node_id) + * Return -1 on failure + */ +int kfd_numa_node_to_apic_id(int numa_node_id) +{ + if (numa_node_id == -1) { + pr_warn("Invalid NUMA Node. Use online CPU mask\n"); + return kfd_cpumask_to_apic_id(cpu_online_mask); + } + return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id)); +} + +#if defined(CONFIG_DEBUG_FS) + +int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) +{ + struct kfd_topology_device *dev; + unsigned int i = 0; + int r = 0; + + down_read(&topology_lock); + + list_for_each_entry(dev, &topology_device_list, list) { + if (!dev->gpu) { + i++; + continue; + } + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = dqm_debugfs_hqds(m, dev->gpu->dqm); + if (r) + break; + } + + up_read(&topology_lock); + + return r; +} + +int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) +{ + struct kfd_topology_device *dev; + unsigned int i = 0; + int r = 0; + + down_read(&topology_lock); + + list_for_each_entry(dev, &topology_device_list, list) { + if (!dev->gpu) { + i++; + continue; + } + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); + if (r) + break; + } + + up_read(&topology_lock); + + return r; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index c3ddb9b95ff8de6a52196f581d9eee1c1ccd3cb0..53fca1f4540141d7148e2423f941819d8aac66d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -39,8 +39,13 @@ #define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 #define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 -#define HSA_CAP_RESERVED 0xfffff000 -#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 +#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 +#define HSA_CAP_RESERVED 0xffffc000 + +#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 +#define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { uint32_t cpu_cores_count; @@ -91,8 +96,6 @@ struct kfd_mem_properties { struct attribute attr; }; -#define KFD_TOPOLOGY_CPU_SIBLINGS 256 - #define HSA_CACHE_TYPE_DATA 0x00000001 #define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 #define HSA_CACHE_TYPE_CPU 0x00000004 @@ -109,7 +112,7 @@ struct kfd_cache_properties { uint32_t cache_assoc; uint32_t cache_latency; uint32_t cache_type; - uint8_t sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS]; + uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; struct kobject *kobj; struct attribute attr; }; @@ -132,24 +135,36 @@ struct kfd_iolink_properties { struct attribute attr; }; +struct kfd_perf_properties { + struct list_head list; + char block_name[16]; + uint32_t max_concurrent; + struct attribute_group *attr_group; +}; + struct kfd_topology_device { struct list_head list; uint32_t gpu_id; + uint32_t proximity_domain; struct kfd_node_properties node_props; - uint32_t mem_bank_count; struct list_head mem_props; uint32_t cache_count; struct list_head cache_props; uint32_t io_link_count; struct list_head io_link_props; + struct list_head perf_props; struct kfd_dev *gpu; struct kobject *kobj_node; struct kobject *kobj_mem; struct kobject *kobj_cache; struct kobject *kobj_iolink; + struct kobject *kobj_perf; struct attribute attr_gpuid; struct attribute attr_name; struct attribute attr_props; + uint8_t oem_id[CRAT_OEMID_LENGTH]; + uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; + uint32_t oem_revision; }; struct kfd_system_properties { @@ -164,6 +179,12 @@ struct kfd_system_properties { struct attribute attr_props; }; +struct kfd_topology_device *kfd_create_topology_device( + struct list_head *device_list); +void kfd_release_topology_device_list(struct list_head *device_list); +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); #endif /* __KFD_TOPOLOGY_H__ */ diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index 8ba37dd9cf7fce68104ffdd8a637642d4507290a..c27c81cdeed3b815eb4f0630b12a412cb9eabb27 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the DAL (Display Abstract Layer), which is a sub-component # of the AMDGPU drm driver. # It provides the HW control for display related functionalities. diff --git a/drivers/gpu/drm/amd/display/TODO b/drivers/gpu/drm/amd/display/TODO index 46464678f2b317b9c0a588f81a5abe2ceb3ac36a..357d596484016a8f92cd9afd655cece66857b606 100644 --- a/drivers/gpu/drm/amd/display/TODO +++ b/drivers/gpu/drm/amd/display/TODO @@ -105,3 +105,6 @@ useless with filtering output. dynamic debug printing might be an option. 20. Use kernel i2c device to program HDMI retimer. Some boards have an HDMI retimer that we need to program to pass PHY compliance. Currently that's bypassing the i2c device and goes directly to HW. This should be changed. + +21. Remove vector.c from dc/basics. It's used in DDC code which can probably +be simplified enough to no longer need a vector implementation. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 4699e47aa76b00969bd0a7a7cc1f074df709563c..2b72009844f8316f92ed9dd2bf9bfac187219520 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'dm' sub-component of DAL. # It provides the control and status of dm blocks. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ccbf10e3bbb6879dcf2f1cebfd3db1dcc085ec23..1ce4c98385e3a17385a00b076a699cd64462d4dd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -431,9 +431,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc = dc_create(&init_data); if (adev->dm.dc) { - DRM_INFO("Display Core initialized!\n"); + DRM_INFO("Display Core initialized with v%s!\n", DC_VER); } else { - DRM_INFO("Display Core failed to initialize!\n"); + DRM_INFO("Display Core failed to initialize with v%s!\n", DC_VER); goto error; } @@ -2351,7 +2351,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct dm_connector_state *dm_state) { struct drm_display_mode *preferred_mode = NULL; - const struct drm_connector *drm_connector; + struct drm_connector *drm_connector; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; @@ -2370,11 +2370,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (!aconnector->dc_sink) { /* - * Exclude MST from creating fake_sink - * TODO: need to enable MST into fake_sink feature + * Create dc_sink when necessary to MST + * Don't apply fake_sink to MST */ - if (aconnector->mst_port) - goto stream_create_fail; + if (aconnector->mst_port) { + dm_dp_mst_dc_sink_create(drm_connector); + goto mst_dc_sink_create_done; + } if (create_fake_sink(aconnector)) goto stream_create_fail; @@ -2425,6 +2427,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, stream_create_fail: dm_state_null: drm_connector_null: +mst_dc_sink_create_done: return stream; } @@ -2725,8 +2728,7 @@ static void create_eml_sink(struct amdgpu_dm_connector *aconnector) }; struct edid *edid; - if (!aconnector->base.edid_blob_ptr || - !aconnector->base.edid_blob_ptr->data) { + if (!aconnector->base.edid_blob_ptr) { DRM_ERROR("No EDID firmware found on connector: %s ,forcing to OFF!\n", aconnector->base.name); @@ -4514,18 +4516,15 @@ static int dm_update_crtcs_state(struct dc *dc, __func__, acrtc->base.base.id); break; } - } - if (enable && dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && - dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { - - new_crtc_state->mode_changed = false; - - DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d", - new_crtc_state->mode_changed); + if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && + dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { + new_crtc_state->mode_changed = false; + DRM_DEBUG_DRIVER("Mode change not required, setting mode_changed to %d", + new_crtc_state->mode_changed); + } } - if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) goto next_crtc; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 8a1e4f5dbd648cbdc58ce8c62c44a55192f89f57..2faa77a7eedaef72246c49e7ae893050a45e06d4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -189,6 +189,8 @@ struct amdgpu_dm_connector { struct mutex hpd_lock; bool fake_enable; + + bool mst_connected; }; #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 707928b88448cf13fb41caaab84d999e31be3891..f3d87f418d2efffd349358fd08b3b52e2a3cca5d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -180,6 +180,42 @@ static int dm_connector_update_modes(struct drm_connector *connector, return drm_add_edid_modes(connector, edid); } +void dm_dp_mst_dc_sink_create(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct edid *edid; + struct dc_sink *dc_sink; + struct dc_sink_init_data init_params = { + .link = aconnector->dc_link, + .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST }; + + edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); + + if (!edid) { + drm_mode_connector_update_edid_property( + &aconnector->base, + NULL); + return; + } + + aconnector->edid = edid; + + dc_sink = dc_link_add_remote_sink( + aconnector->dc_link, + (uint8_t *)aconnector->edid, + (aconnector->edid->extensions + 1) * EDID_LENGTH, + &init_params); + + dc_sink->priv = aconnector; + aconnector->dc_sink = dc_sink; + + amdgpu_dm_add_sink_to_freesync_module( + connector, aconnector->edid); + + drm_mode_connector_update_edid_property( + &aconnector->base, aconnector->edid); +} + static int dm_dp_mst_get_modes(struct drm_connector *connector) { struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); @@ -306,6 +342,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_set_path_property(connector, pathprop); drm_connector_list_iter_end(&conn_iter); + aconnector->mst_connected = true; return &aconnector->base; } } @@ -358,6 +395,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, */ amdgpu_dm_connector_funcs_reset(connector); + aconnector->mst_connected = true; + DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n", aconnector, connector->base.id, aconnector->mst_port); @@ -389,6 +428,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, drm_mode_connector_update_edid_property( &aconnector->base, NULL); + + aconnector->mst_connected = false; } static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) @@ -399,10 +440,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) drm_kms_helper_hotplug_event(dev); } +static void dm_dp_mst_link_status_reset(struct drm_connector *connector) +{ + mutex_lock(&connector->dev->mode_config.mutex); + drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD); + mutex_unlock(&connector->dev->mode_config.mutex); +} + static void dm_dp_mst_register_connector(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); if (adev->mode_info.rfbdev) drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector); @@ -411,6 +460,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector) drm_connector_register(connector); + if (aconnector->mst_connected) + dm_dp_mst_link_status_reset(connector); } static const struct drm_dp_mst_topology_cbs dm_mst_cbs = { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index 2da851b40042aee9b79eb2c666d45c0f5061fee0..8cf51da26657e29e72062b34aeed7e5d827f9e21 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -31,5 +31,6 @@ struct amdgpu_dm_connector; void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *aconnector); +void dm_dp_mst_dc_sink_create(struct drm_connector *connector); #endif diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 4f83e3011743f14469401d867f9892a2dbb311bd..aed538a4d1bace016fb37526d099656227b81348 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for Display Core (dc) component. # diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile index 43c5ccdeeb724e65729b5d93b6aea5cc5cf2b81f..bca33bd9a0d250d55734cc0d440839937dcbd71d 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/Makefile +++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile @@ -1,9 +1,30 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'utils' sub-component of DAL. # It provides the general basic services required by other DAL # subcomponents. -BASICS = conversion.o fixpt31_32.o fixpt32_32.o grph_object_id.o \ +BASICS = conversion.o fixpt31_32.o fixpt32_32.o \ logger.o log_helpers.o vector.o AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS)) diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c index 23c9a0ec01817c6a23be8fdbbd5ecbdf5816c585..310964915a83801c9cc914af3d5291a231ab7c38 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c @@ -46,7 +46,7 @@ uint16_t fixed_point_to_int_frac( arg)); if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor)) - numerator = (uint16_t)dal_fixed31_32_floor( + numerator = (uint16_t)dal_fixed31_32_round( dal_fixed31_32_mul_int( arg, divisor)); diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c index 26936892c6f59c7702b34351bfa63a4db6dfd643..011a97f82fb625f3c00e042d1a4acc2a0d2e5b89 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c @@ -554,6 +554,22 @@ static inline uint32_t ux_dy( return result | fractional_part; } +static inline uint32_t clamp_ux_dy( + int64_t value, + uint32_t integer_bits, + uint32_t fractional_bits, + uint32_t min_clamp) +{ + uint32_t truncated_val = ux_dy(value, integer_bits, fractional_bits); + + if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) + return (1 << (integer_bits + fractional_bits)) - 1; + else if (truncated_val > min_clamp) + return truncated_val; + else + return min_clamp; +} + uint32_t dal_fixed31_32_u2d19( struct fixed31_32 arg) { @@ -565,3 +581,15 @@ uint32_t dal_fixed31_32_u0d19( { return ux_dy(arg.value, 0, 19); } + +uint32_t dal_fixed31_32_clamp_u0d14( + struct fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 14, 1); +} + +uint32_t dal_fixed31_32_clamp_u0d10( + struct fixed31_32 arg) +{ + return clamp_ux_dy(arg.value, 0, 10, 1); +} diff --git a/drivers/gpu/drm/amd/display/dc/bios/Makefile b/drivers/gpu/drm/amd/display/dc/bios/Makefile index 6ec815dce9ccc116c330cfebdfbefcc29ae91b20..239e86bbec5a17e318f7d82bad3d607bcbcd43d3 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/Makefile +++ b/drivers/gpu/drm/amd/display/dc/bios/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'bios' sub-component of DAL. # It provides the parsing and executing controls for atom bios image. diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 86e6438c5cf35a6c0f981524454d58af417a183d..c00e405b63e89e8ef93bd23f90fa9596184edc9d 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -190,6 +190,7 @@ static struct graphics_object_id bios_parser_get_connector_id( struct bios_parser *bp = BP_FROM_DCB(dcb); struct graphics_object_id object_id = dal_graphics_object_id_init( 0, ENUM_ID_UNKNOWN, OBJECT_TYPE_UNKNOWN); + uint16_t id; uint32_t connector_table_offset = bp->object_info_tbl_offset + le16_to_cpu(bp->object_info_tbl.v1_1->usConnectorObjectTableOffset); @@ -197,12 +198,19 @@ static struct graphics_object_id bios_parser_get_connector_id( ATOM_OBJECT_TABLE *tbl = GET_IMAGE(ATOM_OBJECT_TABLE, connector_table_offset); - if (tbl && tbl->ucNumberOfObjects > i) { - const uint16_t id = le16_to_cpu(tbl->asObjects[i].usObjectID); + if (!tbl) { + dm_error("Can't get connector table from atom bios.\n"); + return object_id; + } - object_id = object_id_from_bios_object_id(id); + if (tbl->ucNumberOfObjects <= i) { + dm_error("Can't find connector id %d in connector table of size %d.\n", + i, tbl->ucNumberOfObjects); + return object_id; } + id = le16_to_cpu(tbl->asObjects[i].usObjectID); + object_id = object_id_from_bios_object_id(id); return object_id; } @@ -2254,6 +2262,52 @@ static enum bp_result get_gpio_i2c_info(struct bios_parser *bp, return BP_RESULT_OK; } +static bool dal_graphics_object_id_is_valid(struct graphics_object_id id) +{ + bool rc = true; + + switch (id.type) { + case OBJECT_TYPE_UNKNOWN: + rc = false; + break; + case OBJECT_TYPE_GPU: + case OBJECT_TYPE_ENGINE: + /* do NOT check for id.id == 0 */ + if (id.enum_id == ENUM_ID_UNKNOWN) + rc = false; + break; + default: + if (id.id == 0 || id.enum_id == ENUM_ID_UNKNOWN) + rc = false; + break; + } + + return rc; +} + +static bool dal_graphics_object_id_is_equal( + struct graphics_object_id id1, + struct graphics_object_id id2) +{ + if (false == dal_graphics_object_id_is_valid(id1)) { + dm_output_to_console( + "%s: Warning: comparing invalid object 'id1'!\n", __func__); + return false; + } + + if (false == dal_graphics_object_id_is_valid(id2)) { + dm_output_to_console( + "%s: Warning: comparing invalid object 'id2'!\n", __func__); + return false; + } + + if (id1.id == id2.id && id1.enum_id == id2.enum_id + && id1.type == id2.type) + return true; + + return false; +} + static ATOM_OBJECT *get_bios_object(struct bios_parser *bp, struct graphics_object_id id) { diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 3f7b2dabc2b06c839d43a510585175620bb19aab..4b5fdd577848a059c9e14d4ed22914284499fa9c 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -387,6 +387,7 @@ static void init_transmitter_control(struct bios_parser *bp) bp->cmd_tbl.transmitter_control = transmitter_control_v1_6; break; default: + dm_output_to_console("Don't have transmitter_control for v%d\n", crev); bp->cmd_tbl.transmitter_control = NULL; break; } @@ -910,6 +911,8 @@ static void init_set_pixel_clock(struct bios_parser *bp) bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7; break; default: + dm_output_to_console("Don't have set_pixel_clock for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(SetPixelClock)); bp->cmd_tbl.set_pixel_clock = NULL; break; } @@ -1227,6 +1230,8 @@ static void init_enable_spread_spectrum_on_ppll(struct bios_parser *bp) enable_spread_spectrum_on_ppll_v3; break; default: + dm_output_to_console("Don't have enable_spread_spectrum_on_ppll for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(EnableSpreadSpectrumOnPPLL)); bp->cmd_tbl.enable_spread_spectrum_on_ppll = NULL; break; } @@ -1422,6 +1427,8 @@ static void init_adjust_display_pll(struct bios_parser *bp) bp->cmd_tbl.adjust_display_pll = adjust_display_pll_v3; break; default: + dm_output_to_console("Don't have adjust_display_pll for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(AdjustDisplayPll)); bp->cmd_tbl.adjust_display_pll = NULL; break; } @@ -1695,6 +1702,8 @@ static void init_set_crtc_timing(struct bios_parser *bp) set_crtc_using_dtd_timing_v3; break; default: + dm_output_to_console("Don't have set_crtc_timing for dtd v%d\n", + dtd_version); bp->cmd_tbl.set_crtc_timing = NULL; break; } @@ -1704,6 +1713,8 @@ static void init_set_crtc_timing(struct bios_parser *bp) bp->cmd_tbl.set_crtc_timing = set_crtc_timing_v1; break; default: + dm_output_to_console("Don't have set_crtc_timing for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(SetCRTC_Timing)); bp->cmd_tbl.set_crtc_timing = NULL; break; } @@ -1890,6 +1901,8 @@ static void init_select_crtc_source(struct bios_parser *bp) bp->cmd_tbl.select_crtc_source = select_crtc_source_v3; break; default: + dm_output_to_console("Don't select_crtc_source enable_crtc for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(SelectCRTC_Source)); bp->cmd_tbl.select_crtc_source = NULL; break; } @@ -1997,6 +2010,8 @@ static void init_enable_crtc(struct bios_parser *bp) bp->cmd_tbl.enable_crtc = enable_crtc_v1; break; default: + dm_output_to_console("Don't have enable_crtc for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(EnableCRTC)); bp->cmd_tbl.enable_crtc = NULL; break; } @@ -2103,6 +2118,8 @@ static void init_program_clock(struct bios_parser *bp) bp->cmd_tbl.program_clock = program_clock_v6; break; default: + dm_output_to_console("Don't have program_clock for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(SetPixelClock)); bp->cmd_tbl.program_clock = NULL; break; } @@ -2324,6 +2341,8 @@ static void init_enable_disp_power_gating( enable_disp_power_gating_v2_1; break; default: + dm_output_to_console("Don't enable_disp_power_gating enable_crtc for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(EnableDispPowerGating)); bp->cmd_tbl.enable_disp_power_gating = NULL; break; } @@ -2371,6 +2390,8 @@ static void init_set_dce_clock(struct bios_parser *bp) bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1; break; default: + dm_output_to_console("Don't have set_dce_clock for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(SetDCEClock)); bp->cmd_tbl.set_dce_clock = NULL; break; } diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index ba68693758a795c889765d17444b6eb232805611..fea5e83736fd9cc42a5b8604ec1f9c6807e94abd 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -118,6 +118,7 @@ static void init_dig_encoder_control(struct bios_parser *bp) bp->cmd_tbl.dig_encoder_control = encoder_control_digx_v1_5; break; default: + dm_output_to_console("Don't have dig_encoder_control for v%d\n", version); bp->cmd_tbl.dig_encoder_control = NULL; break; } @@ -205,6 +206,7 @@ static void init_transmitter_control(struct bios_parser *bp) bp->cmd_tbl.transmitter_control = transmitter_control_v1_6; break; default: + dm_output_to_console("Don't have transmitter_control for v%d\n", crev); bp->cmd_tbl.transmitter_control = NULL; break; } @@ -268,6 +270,8 @@ static void init_set_pixel_clock(struct bios_parser *bp) bp->cmd_tbl.set_pixel_clock = set_pixel_clock_v7; break; default: + dm_output_to_console("Don't have set_pixel_clock for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(setpixelclock)); bp->cmd_tbl.set_pixel_clock = NULL; break; } @@ -379,6 +383,7 @@ static void init_set_crtc_timing(struct bios_parser *bp) set_crtc_using_dtd_timing_v3; break; default: + dm_output_to_console("Don't have set_crtc_timing for v%d\n", dtd_version); bp->cmd_tbl.set_crtc_timing = NULL; break; } @@ -498,6 +503,8 @@ static void init_select_crtc_source(struct bios_parser *bp) bp->cmd_tbl.select_crtc_source = select_crtc_source_v3; break; default: + dm_output_to_console("Don't select_crtc_source enable_crtc for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(selectcrtc_source)); bp->cmd_tbl.select_crtc_source = NULL; break; } @@ -565,6 +572,8 @@ static void init_enable_crtc(struct bios_parser *bp) bp->cmd_tbl.enable_crtc = enable_crtc_v1; break; default: + dm_output_to_console("Don't have enable_crtc for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(enablecrtc)); bp->cmd_tbl.enable_crtc = NULL; break; } @@ -661,6 +670,8 @@ static void init_enable_disp_power_gating( enable_disp_power_gating_v2_1; break; default: + dm_output_to_console("Don't enable_disp_power_gating enable_crtc for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(enabledisppowergating)); bp->cmd_tbl.enable_disp_power_gating = NULL; break; } @@ -710,6 +721,8 @@ static void init_set_dce_clock(struct bios_parser *bp) bp->cmd_tbl.set_dce_clock = set_dce_clock_v2_1; break; default: + dm_output_to_console("Don't have set_dce_clock for v%d\n", + BIOS_CMD_TABLE_PARA_REVISION(setdceclock)); bp->cmd_tbl.set_dce_clock = NULL; break; } diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 41ef35995b029e5586e404a4b8632ef689d315c7..7959e382ed28f0b4aa833edace0ec3938dbefe34 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'calcs' sub-component of DAL. # It calculates Bandwidth and Watermarks values for HW programming # diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 6347712db834b76af9dc8a0069884f8fff64a907..2e11fac2a63ddfac42eeb726cc66e27f3541e41d 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -29,6 +29,15 @@ #include "core_types.h" #include "dal_asic_id.h" +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + /******************************************************************************* * Private Functions ******************************************************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c index 626f9cf8aad282f49af70516b612397705b54ab8..5e2ea12fbb731f71da889ca3e8338d008d377385 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.c @@ -27,6 +27,15 @@ #include "dcn_calc_auto.h" #include "dcn_calc_math.h" +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + /*REVISION#250*/ void scaler_settings_calculation(struct dcn_bw_internal_vars *v) { @@ -773,11 +782,11 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v) v->dst_y_after_scaler = 0.0; } v->time_calc = 24.0 / v->projected_dcfclk_deep_sleep; - v->v_update_offset[k] =dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0); + v->v_update_offset[k][j] = dcn_bw_ceil2(v->htotal[k] / 4.0, 1.0); v->total_repeater_delay = v->max_inter_dcn_tile_repeaters * (2.0 / (v->required_dispclk[i][j] / (j + 1)) + 3.0 / v->required_dispclk[i][j]); - v->v_update_width[k] = (14.0 / v->projected_dcfclk_deep_sleep + 12.0 / (v->required_dispclk[i][j] / (j + 1)) + v->total_repeater_delay) * v->pixel_clock[k]; - v->v_ready_offset[k] =dcn_bw_max2(150.0 / (v->required_dispclk[i][j] / (j + 1)), v->total_repeater_delay + 20.0 / v->projected_dcfclk_deep_sleep + 10.0 / (v->required_dispclk[i][j] / (j + 1))) * v->pixel_clock[k]; - v->time_setup = (v->v_update_offset[k] + v->v_update_width[k] + v->v_ready_offset[k]) / v->pixel_clock[k]; + v->v_update_width[k][j] = (14.0 / v->projected_dcfclk_deep_sleep + 12.0 / (v->required_dispclk[i][j] / (j + 1)) + v->total_repeater_delay) * v->pixel_clock[k]; + v->v_ready_offset[k][j] = dcn_bw_max2(150.0 / (v->required_dispclk[i][j] / (j + 1)), v->total_repeater_delay + 20.0 / v->projected_dcfclk_deep_sleep + 10.0 / (v->required_dispclk[i][j] / (j + 1))) * v->pixel_clock[k]; + v->time_setup = (v->v_update_offset[k][j] + v->v_update_width[k][j] + v->v_ready_offset[k][j]) / v->pixel_clock[k]; v->extra_latency = v->urgent_round_trip_and_out_of_order_latency_per_state[i] + (v->total_number_of_active_dpp[i][j] * v->pixel_chunk_size_in_kbyte + v->total_number_of_dcc_active_dpp[i][j] * v->meta_chunk_size) * 1024.0 / v->return_bw_per_state[i]; if (v->pte_enable == dcn_bw_yes) { v->extra_latency = v->extra_latency + v->total_number_of_active_dpp[i][j] * v->pte_chunk_size * 1024.0 / v->return_bw_per_state[i]; diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c index b6abe0f3bb152e3b89966d09988bccc3bb840ff5..7600a4a4abc7fd8bd03c707f6df8bf9d9b9aa3a7 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.c @@ -25,37 +25,44 @@ #include "dcn_calc_math.h" +#define isNaN(number) ((number) != (number)) + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + float dcn_bw_mod(const float arg1, const float arg2) { - if (arg1 != arg1) + if (isNaN(arg1)) return arg2; - if (arg2 != arg2) + if (isNaN(arg2)) return arg1; return arg1 - arg1 * ((int) (arg1 / arg2)); } float dcn_bw_min2(const float arg1, const float arg2) { - if (arg1 != arg1) + if (isNaN(arg1)) return arg2; - if (arg2 != arg2) + if (isNaN(arg2)) return arg1; return arg1 < arg2 ? arg1 : arg2; } unsigned int dcn_bw_max(const unsigned int arg1, const unsigned int arg2) { - if (arg1 != arg1) - return arg2; - if (arg2 != arg2) - return arg1; return arg1 > arg2 ? arg1 : arg2; } float dcn_bw_max2(const float arg1, const float arg2) { - if (arg1 != arg1) + if (isNaN(arg1)) return arg2; - if (arg2 != arg2) + if (isNaN(arg2)) return arg1; return arg1 > arg2 ? arg1 : arg2; } diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index a4fbca34bcdf873283ef34e19202d85211daa772..331891c2c71a2aacd39221ef62fce6f4cf4604c9 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -33,6 +33,15 @@ #include "dcn10/dcn10_resource.h" #include "dcn_calc_math.h" +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + /* Defaults from spreadsheet rev#247 */ const struct dcn_soc_bounding_box dcn10_soc_defaults = { /* latencies */ @@ -878,6 +887,17 @@ bool dcn_validate_bandwidth( + pipe->bottom_pipe->plane_res.scl_data.recout.width; } + if (pipe->plane_state->rotation % 2 == 0) { + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + || v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]); + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + || v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]); + } else { + ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value + || v->scaler_recout_height[input_idx] == v->viewport_width[input_idx]); + ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value + || v->scaler_rec_out_width[input_idx] == v->viewport_height[input_idx]); + } v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no; v->source_pixel_format[input_idx] = tl_pixel_format_to_bw_defs( pipe->plane_state->format); @@ -888,6 +908,15 @@ bool dcn_validate_bandwidth( v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps; v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c; v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c; + /* + * Spreadsheet doesn't handle taps_c is one properly, + * need to force Chroma to always be scaled to pass + * bandwidth validation. + */ + if (v->override_hta_pschroma[input_idx] == 1) + v->override_hta_pschroma[input_idx] = 2; + if (v->override_vta_pschroma[input_idx] == 1) + v->override_vta_pschroma[input_idx] = 2; v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor; } if (v->is_line_buffer_bpp_fixed == dcn_bw_yes) @@ -985,9 +1014,9 @@ bool dcn_validate_bandwidth( if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) continue; - pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx]; - pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx]; - pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx]; + pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; + pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; + pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx]; pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total; @@ -1026,9 +1055,9 @@ bool dcn_validate_bandwidth( TIMING_3D_FORMAT_SIDE_BY_SIDE))) { if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { /* update previously split pipe */ - hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx]; - hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx]; - hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx]; + hsplit_pipe->pipe_dlg_param.vupdate_width = v->v_update_width[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; + hsplit_pipe->pipe_dlg_param.vupdate_offset = v->v_update_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; + hsplit_pipe->pipe_dlg_param.vready_offset = v->v_ready_offset[input_idx][v->dpp_per_plane[input_idx] == 2 ? 1 : 0]; hsplit_pipe->pipe_dlg_param.vstartup_start = v->v_startup[input_idx]; hsplit_pipe->pipe_dlg_param.htotal = pipe->stream->timing.h_total; @@ -1556,35 +1585,6 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) dc->dcn_ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one, dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed, dc->dcn_ip->dcfclk_cstate_latency); - dc->dml.soc.vmin.socclk_mhz = dc->dcn_soc->socclk; - dc->dml.soc.vmid.socclk_mhz = dc->dcn_soc->socclk; - dc->dml.soc.vnom.socclk_mhz = dc->dcn_soc->socclk; - dc->dml.soc.vmax.socclk_mhz = dc->dcn_soc->socclk; - - dc->dml.soc.vmin.dcfclk_mhz = dc->dcn_soc->dcfclkv_min0p65; - dc->dml.soc.vmid.dcfclk_mhz = dc->dcn_soc->dcfclkv_mid0p72; - dc->dml.soc.vnom.dcfclk_mhz = dc->dcn_soc->dcfclkv_nom0p8; - dc->dml.soc.vmax.dcfclk_mhz = dc->dcn_soc->dcfclkv_max0p9; - - dc->dml.soc.vmin.dispclk_mhz = dc->dcn_soc->max_dispclk_vmin0p65; - dc->dml.soc.vmid.dispclk_mhz = dc->dcn_soc->max_dispclk_vmid0p72; - dc->dml.soc.vnom.dispclk_mhz = dc->dcn_soc->max_dispclk_vnom0p8; - dc->dml.soc.vmax.dispclk_mhz = dc->dcn_soc->max_dispclk_vmax0p9; - - dc->dml.soc.vmin.dppclk_mhz = dc->dcn_soc->max_dppclk_vmin0p65; - dc->dml.soc.vmid.dppclk_mhz = dc->dcn_soc->max_dppclk_vmid0p72; - dc->dml.soc.vnom.dppclk_mhz = dc->dcn_soc->max_dppclk_vnom0p8; - dc->dml.soc.vmax.dppclk_mhz = dc->dcn_soc->max_dppclk_vmax0p9; - - dc->dml.soc.vmin.phyclk_mhz = dc->dcn_soc->phyclkv_min0p65; - dc->dml.soc.vmid.phyclk_mhz = dc->dcn_soc->phyclkv_mid0p72; - dc->dml.soc.vnom.phyclk_mhz = dc->dcn_soc->phyclkv_nom0p8; - dc->dml.soc.vmax.phyclk_mhz = dc->dcn_soc->phyclkv_max0p9; - - dc->dml.soc.vmin.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65; - dc->dml.soc.vmid.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72; - dc->dml.soc.vnom.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8; - dc->dml.soc.vmax.dram_bw_per_chan_gbps = dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9; dc->dml.soc.sr_exit_time_us = dc->dcn_soc->sr_exit_time; dc->dml.soc.sr_enter_plus_exit_time_us = dc->dcn_soc->sr_enter_plus_exit_time; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index d1488d5ee02816488155680f7b853877aef7167b..35e84ed031de08f5edfadbfb71e550291ae8ebef 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -283,19 +283,17 @@ static bool construct(struct dc *dc, const struct dc_init_data *init_params) { struct dal_logger *logger; - struct dc_context *dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL); - struct bw_calcs_dceip *dc_dceip = kzalloc(sizeof(*dc_dceip), - GFP_KERNEL); - struct bw_calcs_vbios *dc_vbios = kzalloc(sizeof(*dc_vbios), - GFP_KERNEL); + struct dc_context *dc_ctx; + struct bw_calcs_dceip *dc_dceip; + struct bw_calcs_vbios *dc_vbios; #ifdef CONFIG_DRM_AMD_DC_DCN1_0 - struct dcn_soc_bounding_box *dcn_soc = kzalloc(sizeof(*dcn_soc), - GFP_KERNEL); - struct dcn_ip_params *dcn_ip = kzalloc(sizeof(*dcn_ip), GFP_KERNEL); + struct dcn_soc_bounding_box *dcn_soc; + struct dcn_ip_params *dcn_ip; #endif enum dce_version dc_version = DCE_VERSION_UNKNOWN; + dc_dceip = kzalloc(sizeof(*dc_dceip), GFP_KERNEL); if (!dc_dceip) { dm_error("%s: failed to create dceip\n", __func__); goto fail; @@ -303,6 +301,7 @@ static bool construct(struct dc *dc, dc->bw_dceip = dc_dceip; + dc_vbios = kzalloc(sizeof(*dc_vbios), GFP_KERNEL); if (!dc_vbios) { dm_error("%s: failed to create vbios\n", __func__); goto fail; @@ -310,6 +309,7 @@ static bool construct(struct dc *dc, dc->bw_vbios = dc_vbios; #ifdef CONFIG_DRM_AMD_DC_DCN1_0 + dcn_soc = kzalloc(sizeof(*dcn_soc), GFP_KERNEL); if (!dcn_soc) { dm_error("%s: failed to create dcn_soc\n", __func__); goto fail; @@ -317,6 +317,7 @@ static bool construct(struct dc *dc, dc->dcn_soc = dcn_soc; + dcn_ip = kzalloc(sizeof(*dcn_ip), GFP_KERNEL); if (!dcn_ip) { dm_error("%s: failed to create dcn_ip\n", __func__); goto fail; @@ -325,11 +326,18 @@ static bool construct(struct dc *dc, dc->dcn_ip = dcn_ip; #endif + dc_ctx = kzalloc(sizeof(*dc_ctx), GFP_KERNEL); if (!dc_ctx) { dm_error("%s: failed to create ctx\n", __func__); goto fail; } + dc_ctx->cgs_device = init_params->cgs_device; + dc_ctx->driver_context = init_params->driver; + dc_ctx->dc = dc; + dc_ctx->asic_id = init_params->asic_id; + dc->ctx = dc_ctx; + dc->current_state = dc_create_state(); if (!dc->current_state) { @@ -337,11 +345,6 @@ static bool construct(struct dc *dc, goto fail; } - dc_ctx->cgs_device = init_params->cgs_device; - dc_ctx->driver_context = init_params->driver; - dc_ctx->dc = dc; - dc_ctx->asic_id = init_params->asic_id; - /* Create logger */ logger = dal_logger_create(dc_ctx, init_params->log_mask); @@ -351,11 +354,10 @@ static bool construct(struct dc *dc, goto fail; } dc_ctx->logger = logger; - dc->ctx = dc_ctx; - dc->ctx->dce_environment = init_params->dce_environment; + dc_ctx->dce_environment = init_params->dce_environment; dc_version = resource_parse_asic_id(init_params->asic_id); - dc->ctx->dce_version = dc_version; + dc_ctx->dce_version = dc_version; #if defined(CONFIG_DRM_AMD_DC_FBC) dc->ctx->fbc_gpu_addr = init_params->fbc_gpu_addr; @@ -578,7 +580,7 @@ static void program_timing_sync( for (j = 0; j < group_size; j++) { struct pipe_ctx *temp; - if (!pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) { + if (pipe_set[j]->stream_res.tg->funcs->is_blanked && !pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) { if (j == 0) break; @@ -591,7 +593,7 @@ static void program_timing_sync( /* remove any other unblanked pipes as they have already been synced */ for (j = j + 1; j < group_size; j++) { - if (!pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) { + if (pipe_set[j]->stream_res.tg->funcs->is_blanked && !pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg)) { group_size--; pipe_set[j] = pipe_set[group_size]; j--; @@ -786,6 +788,8 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) dc->hwss.disable_plane(dc, &context->res_ctx.pipe_ctx[i]); } + dc->optimized_required = false; + /* 3rd param should be true, temp w/a for RV*/ #if defined(CONFIG_DRM_AMD_DC_DCN1_0) dc->hwss.set_bandwidth(dc, context, dc->ctx->dce_version < DCN_VERSION_1_0); @@ -981,6 +985,11 @@ static enum surface_update_type get_plane_info_update_type(const struct dc_surfa if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) update_flags->bits.per_pixel_alpha_change = 1; + if (u->plane_info->dcc.enable != u->surface->dcc.enable + || u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks + || u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) + update_flags->bits.dcc_change = 1; + if (pixel_format_to_bpp(u->plane_info->format) != pixel_format_to_bpp(u->surface->format)) /* different bytes per element will require full bandwidth @@ -1178,12 +1187,6 @@ static void commit_planes_for_stream(struct dc *dc, if (update_type == UPDATE_TYPE_FULL) { dc->hwss.set_bandwidth(dc, context, false); context_clock_trace(dc, context); - - for (j = 0; j < dc->res_pool->pipe_count; j++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j]; - - dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx); - } } if (surface_count == 0) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 2e509382935fecf7366f0d086c030d061071f008..1babac07bcc9eaee38651e179099d30f660996b4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -1,3 +1,25 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ /* * dc_debug.c * diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 71993d5983bfa907182f14148709eda4cfeefacd..ebc96b7200835ede9c64202ea0c7a44e69c9edac 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -28,6 +28,8 @@ #include "timing_generator.h" #include "hw_sequencer.h" +#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) + /* used as index in array of black_color_format */ enum black_color_format { BLACK_COLOR_FORMAT_RGB_FULLRANGE = 0, @@ -38,6 +40,15 @@ enum black_color_format { BLACK_COLOR_FORMAT_DEBUG, }; +enum dc_color_space_type { + COLOR_SPACE_RGB_TYPE, + COLOR_SPACE_RGB_LIMITED_TYPE, + COLOR_SPACE_YCBCR601_TYPE, + COLOR_SPACE_YCBCR709_TYPE, + COLOR_SPACE_YCBCR601_LIMITED_TYPE, + COLOR_SPACE_YCBCR709_LIMITED_TYPE +}; + static const struct tg_color black_color_format[] = { /* BlackColorFormat_RGB_FullRange */ {0, 0, 0}, @@ -53,6 +64,140 @@ static const struct tg_color black_color_format[] = { {0xff, 0xff, 0}, }; +struct out_csc_color_matrix_type { + enum dc_color_space_type color_space_type; + uint16_t regval[12]; +}; + +static const struct out_csc_color_matrix_type output_csc_matrix[] = { + { COLOR_SPACE_RGB_TYPE, + { 0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, + { COLOR_SPACE_RGB_LIMITED_TYPE, + { 0x1B67, 0, 0, 0x201, 0, 0x1B67, 0, 0x201, 0, 0, 0x1B67, 0x201} }, + { COLOR_SPACE_YCBCR601_TYPE, + { 0xE04, 0xF444, 0xFDB9, 0x1004, 0x831, 0x1016, 0x320, 0x201, 0xFB45, + 0xF6B7, 0xE04, 0x1004} }, + { COLOR_SPACE_YCBCR709_TYPE, + { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA, + 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} }, + + /* TODO: correct values below */ + { COLOR_SPACE_YCBCR601_LIMITED_TYPE, + { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991, + 0x12C9, 0x3A6, 0x200, 0xFB47, 0xF6B9, 0xE00, 0x1000} }, + { COLOR_SPACE_YCBCR709_LIMITED_TYPE, + { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, + 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, +}; + +static bool is_rgb_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_SRGB || + color_space == COLOR_SPACE_XR_RGB || + color_space == COLOR_SPACE_MSREF_SCRGB || + color_space == COLOR_SPACE_2020_RGB_FULLRANGE || + color_space == COLOR_SPACE_ADOBERGB || + color_space == COLOR_SPACE_DCIP3 || + color_space == COLOR_SPACE_DOLBYVISION) + ret = true; + return ret; +} + +static bool is_rgb_limited_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_SRGB_LIMITED || + color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) + ret = true; + return ret; +} + +static bool is_ycbcr601_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_YCBCR601 || + color_space == COLOR_SPACE_XV_YCC_601) + ret = true; + return ret; +} + +static bool is_ycbcr601_limited_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_YCBCR601_LIMITED) + ret = true; + return ret; +} + +static bool is_ycbcr709_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_YCBCR709 || + color_space == COLOR_SPACE_XV_YCC_709) + ret = true; + return ret; +} + +static bool is_ycbcr709_limited_type( + enum dc_color_space color_space) +{ + bool ret = false; + + if (color_space == COLOR_SPACE_YCBCR709_LIMITED) + ret = true; + return ret; +} +enum dc_color_space_type get_color_space_type(enum dc_color_space color_space) +{ + enum dc_color_space_type type = COLOR_SPACE_RGB_TYPE; + + if (is_rgb_type(color_space)) + type = COLOR_SPACE_RGB_TYPE; + else if (is_rgb_limited_type(color_space)) + type = COLOR_SPACE_RGB_LIMITED_TYPE; + else if (is_ycbcr601_type(color_space)) + type = COLOR_SPACE_YCBCR601_TYPE; + else if (is_ycbcr709_type(color_space)) + type = COLOR_SPACE_YCBCR709_TYPE; + else if (is_ycbcr601_limited_type(color_space)) + type = COLOR_SPACE_YCBCR601_LIMITED_TYPE; + else if (is_ycbcr709_limited_type(color_space)) + type = COLOR_SPACE_YCBCR709_LIMITED_TYPE; + + return type; +} + +const uint16_t *find_color_matrix(enum dc_color_space color_space, + uint32_t *array_size) +{ + int i; + enum dc_color_space_type type; + const uint16_t *val = NULL; + int arr_size = NUM_ELEMENTS(output_csc_matrix); + + type = get_color_space_type(color_space); + for (i = 0; i < arr_size; i++) + if (output_csc_matrix[i].color_space_type == type) { + val = output_csc_matrix[i].regval; + *array_size = 12; + break; + } + + return val; +} + + void color_space_to_black_color( const struct dc *dc, enum dc_color_space colorspace, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 7b0e43c0685cc56044d4e36d3f1d8e6d8ecb55c2..a3742827157361ac0c3c57733eb845cd34ea4720 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -938,8 +938,9 @@ static bool construct( link->link_id = bios->funcs->get_connector_id(bios, init_params->connector_index); if (link->link_id.type != OBJECT_TYPE_CONNECTOR) { - dm_error("%s: Invalid Connector ObjectID from Adapter Service for connector index:%d!\n", - __func__, init_params->connector_index); + dm_error("%s: Invalid Connector ObjectID from Adapter Service for connector index:%d! type %d expected %d\n", + __func__, init_params->connector_index, + link->link_id.type, OBJECT_TYPE_CONNECTOR); goto create_fail; } @@ -1271,6 +1272,24 @@ static enum dc_status enable_link_dp( return status; } +static enum dc_status enable_link_edp( + struct dc_state *state, + struct pipe_ctx *pipe_ctx) +{ + enum dc_status status; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; + + link->dc->hwss.edp_power_control(link, true); + link->dc->hwss.edp_wait_for_hpd_ready(link, true); + + status = enable_link_dp(state, pipe_ctx); + + link->dc->hwss.edp_backlight_control(link, true); + + return status; +} + static enum dc_status enable_link_dp_mst( struct dc_state *state, struct pipe_ctx *pipe_ctx) @@ -1746,9 +1765,11 @@ static enum dc_status enable_link( enum dc_status status = DC_ERROR_UNEXPECTED; switch (pipe_ctx->stream->signal) { case SIGNAL_TYPE_DISPLAY_PORT: - case SIGNAL_TYPE_EDP: status = enable_link_dp(state, pipe_ctx); break; + case SIGNAL_TYPE_EDP: + status = enable_link_edp(state, pipe_ctx); + break; case SIGNAL_TYPE_DISPLAY_PORT_MST: status = enable_link_dp_mst(state, pipe_ctx); msleep(200); @@ -1801,7 +1822,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal) link->link_enc->funcs->disable_output(link->link_enc, signal); } -bool dp_active_dongle_validate_timing( +static bool dp_active_dongle_validate_timing( const struct dc_crtc_timing *timing, const struct dc_dongle_caps *dongle_caps) { @@ -1833,6 +1854,8 @@ bool dp_active_dongle_validate_timing( /* Check Color Depth and Pixel Clock */ if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) required_pix_clk /= 2; + else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) + required_pix_clk = required_pix_clk * 2 / 3; switch (timing->display_color_depth) { case COLOR_DEPTH_666: @@ -1907,12 +1930,18 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, { struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; + struct dmcu *dmcu = core_dc->res_pool->dmcu; unsigned int controller_id = 0; + bool use_smooth_brightness = true; int i; - if ((abm == NULL) || (abm->funcs->set_backlight_level == NULL)) + if ((dmcu == NULL) || + (abm == NULL) || + (abm->funcs->set_backlight_level == NULL)) return false; + use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); + dm_logger_write(link->ctx->logger, LOG_BACKLIGHT, "New Backlight level: %d (0x%X)\n", level, level); @@ -1935,7 +1964,8 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, abm, level, frame_ramp, - controller_id); + controller_id, + use_smooth_brightness); } return true; @@ -1952,144 +1982,6 @@ bool dc_link_set_psr_enable(const struct dc_link *link, bool enable, bool wait) return true; } -bool dc_link_get_psr_state(const struct dc_link *link, uint32_t *psr_state) -{ - struct dc *core_dc = link->ctx->dc; - struct dmcu *dmcu = core_dc->res_pool->dmcu; - - if (dmcu != NULL && link->psr_enabled) - dmcu->funcs->get_psr_state(dmcu, psr_state); - - return true; -} - -bool dc_link_setup_psr(struct dc_link *link, - const struct dc_stream_state *stream, struct psr_config *psr_config, - struct psr_context *psr_context) -{ - struct dc *core_dc = link->ctx->dc; - struct dmcu *dmcu = core_dc->res_pool->dmcu; - int i; - - psr_context->controllerId = CONTROLLER_ID_UNDEFINED; - - if (link != NULL && - dmcu != NULL) { - /* updateSinkPsrDpcdConfig*/ - union dpcd_psr_configuration psr_configuration; - - memset(&psr_configuration, 0, sizeof(psr_configuration)); - - psr_configuration.bits.ENABLE = 1; - psr_configuration.bits.CRC_VERIFICATION = 1; - psr_configuration.bits.FRAME_CAPTURE_INDICATION = - psr_config->psr_frame_capture_indication_req; - - /* Check for PSR v2*/ - if (psr_config->psr_version == 0x2) { - /* For PSR v2 selective update. - * Indicates whether sink should start capturing - * immediately following active scan line, - * or starting with the 2nd active scan line. - */ - psr_configuration.bits.LINE_CAPTURE_INDICATION = 0; - /*For PSR v2, determines whether Sink should generate - * IRQ_HPD when CRC mismatch is detected. - */ - psr_configuration.bits.IRQ_HPD_WITH_CRC_ERROR = 1; - } - - dm_helpers_dp_write_dpcd( - link->ctx, - link, - 368, - &psr_configuration.raw, - sizeof(psr_configuration.raw)); - - psr_context->channel = link->ddc->ddc_pin->hw_info.ddc_channel; - psr_context->transmitterId = link->link_enc->transmitter; - psr_context->engineId = link->link_enc->preferred_engine; - - for (i = 0; i < MAX_PIPES; i++) { - if (core_dc->current_state->res_ctx.pipe_ctx[i].stream - == stream) { - /* dmcu -1 for all controller id values, - * therefore +1 here - */ - psr_context->controllerId = - core_dc->current_state->res_ctx. - pipe_ctx[i].stream_res.tg->inst + 1; - break; - } - } - - /* Hardcoded for now. Can be Pcie or Uniphy (or Unknown)*/ - psr_context->phyType = PHY_TYPE_UNIPHY; - /*PhyId is associated with the transmitter id*/ - psr_context->smuPhyId = link->link_enc->transmitter; - - psr_context->crtcTimingVerticalTotal = stream->timing.v_total; - psr_context->vsyncRateHz = div64_u64(div64_u64((stream-> - timing.pix_clk_khz * 1000), - stream->timing.v_total), - stream->timing.h_total); - - psr_context->psrSupportedDisplayConfig = true; - psr_context->psrExitLinkTrainingRequired = - psr_config->psr_exit_link_training_required; - psr_context->sdpTransmitLineNumDeadline = - psr_config->psr_sdp_transmit_line_num_deadline; - psr_context->psrFrameCaptureIndicationReq = - psr_config->psr_frame_capture_indication_req; - - psr_context->skipPsrWaitForPllLock = 0; /* only = 1 in KV */ - - psr_context->numberOfControllers = - link->dc->res_pool->res_cap->num_timing_generator; - - psr_context->rfb_update_auto_en = true; - - /* 2 frames before enter PSR. */ - psr_context->timehyst_frames = 2; - /* half a frame - * (units in 100 lines, i.e. a value of 1 represents 100 lines) - */ - psr_context->hyst_lines = stream->timing.v_total / 2 / 100; - psr_context->aux_repeats = 10; - - psr_context->psr_level.u32all = 0; - -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) - /*skip power down the single pipe since it blocks the cstate*/ - if (ASIC_REV_IS_RAVEN(link->ctx->asic_id.hw_internal_rev)) - psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true; -#endif - - /* SMU will perform additional powerdown sequence. - * For unsupported ASICs, set psr_level flag to skip PSR - * static screen notification to SMU. - * (Always set for DAL2, did not check ASIC) - */ - psr_context->psr_level.bits.SKIP_SMU_NOTIFICATION = 1; - - /* Complete PSR entry before aborting to prevent intermittent - * freezes on certain eDPs - */ - psr_context->psr_level.bits.DISABLE_PSR_ENTRY_ABORT = 1; - - /* Controls additional delay after remote frame capture before - * continuing power down, default = 0 - */ - psr_context->frame_delay = 0; - - link->psr_enabled = true; - dmcu->funcs->setup_psr(dmcu, link, psr_context); - return true; - } else - return false; - -} - const struct dc_link_status *dc_link_get_status(const struct dc_link *link) { return &link->link_status; @@ -2418,6 +2310,9 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option) if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) deallocate_mst_payload(pipe_ctx); + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) + core_dc->hwss.edp_backlight_control(pipe_ctx->stream->sink->link, false); + core_dc->hwss.disable_stream(pipe_ctx, option); disable_link(pipe_ctx->stream->sink->link, pipe_ctx->stream->signal); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 00528b214a9fefa0be61429537670375335bdef8..61e8c3e02d1696bf08f53bb24cefdc074a74cf10 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1470,6 +1470,12 @@ void decide_link_settings(struct dc_stream_state *stream, return; } + /* EDP use the link cap setting */ + if (stream->sink->sink_signal == SIGNAL_TYPE_EDP) { + *link_setting = link->verified_link_cap; + return; + } + /* search for the minimum link setting that: * 1. is supported according to the link training result * 2. could support the b/w requested by the timing diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c index f2902569be2ef92138bfbd186ff7ee7b06bdb888..2096f2a179f2c5e3509ac1c60115049f3a6e9ee5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c @@ -88,15 +88,7 @@ void dp_enable_link_phy( } if (dc_is_dp_sst_signal(signal)) { - if (signal == SIGNAL_TYPE_EDP) { - link->dc->hwss.edp_power_control(link, true); - link_enc->funcs->enable_dp_output( - link_enc, - link_settings, - clock_source); - link->dc->hwss.edp_backlight_control(link, true); - } else - link_enc->funcs->enable_dp_output( + link_enc->funcs->enable_dp_output( link_enc, link_settings, clock_source); @@ -138,7 +130,6 @@ void dp_disable_link_phy(struct dc_link *link, enum signal_type signal) dp_receiver_power_ctrl(link, false); if (signal == SIGNAL_TYPE_EDP) { - link->dc->hwss.edp_backlight_control(link, false); edp_receiver_ready_T9(link); link->link_enc->funcs->disable_output(link->link_enc, signal); link->dc->hwss.edp_power_control(link, false); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 9c5e879f18b36da0c991a7bcf86450ce487bba97..95b8dd0e53c6951a1e28e988fd4cb13dcaf04f0d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1,5 +1,5 @@ /* -* Copyright 2012-15 Advanced Micro Devices, Inc. + * Copyright 2012-15 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -498,26 +498,15 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div; /* Handle hsplit */ - if (pri_split || sec_split) { - /* HMirror XOR Secondary_pipe XOR Rotation_180 */ - bool right_view = (sec_split != plane_state->horizontal_mirror) != - (plane_state->rotation == ROTATION_ANGLE_180); - - if (plane_state->rotation == ROTATION_ANGLE_90 - || plane_state->rotation == ROTATION_ANGLE_270) - /* Secondary_pipe XOR Rotation_270 */ - right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split; - - if (right_view) { - data->viewport.x += data->viewport.width / 2; - data->viewport_c.x += data->viewport_c.width / 2; - /* Ceil offset pipe */ - data->viewport.width = (data->viewport.width + 1) / 2; - data->viewport_c.width = (data->viewport_c.width + 1) / 2; - } else { - data->viewport.width /= 2; - data->viewport_c.width /= 2; - } + if (sec_split) { + data->viewport.x += data->viewport.width / 2; + data->viewport_c.x += data->viewport_c.width / 2; + /* Ceil offset pipe */ + data->viewport.width = (data->viewport.width + 1) / 2; + data->viewport_c.width = (data->viewport_c.width + 1) / 2; + } else if (pri_split) { + data->viewport.width /= 2; + data->viewport_c.width /= 2; } if (plane_state->rotation == ROTATION_ANGLE_90 || @@ -534,6 +523,11 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip struct rect surf_src = plane_state->src_rect; struct rect surf_clip = plane_state->clip_rect; int recout_full_x, recout_full_y; + bool pri_split = pipe_ctx->bottom_pipe && + pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state; + bool sec_split = pipe_ctx->top_pipe && + pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state; + bool top_bottom_split = stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM; if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) @@ -568,33 +562,43 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip - pipe_ctx->plane_res.scl_data.recout.y; /* Handle h & vsplit */ - if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == - pipe_ctx->plane_state) { - if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { - pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height / 2; - /* Floor primary pipe, ceil 2ndary pipe */ - pipe_ctx->plane_res.scl_data.recout.height = (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; + if (sec_split && top_bottom_split) { + pipe_ctx->plane_res.scl_data.recout.y += + pipe_ctx->plane_res.scl_data.recout.height / 2; + /* Floor primary pipe, ceil 2ndary pipe */ + pipe_ctx->plane_res.scl_data.recout.height = + (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; + } else if (pri_split && top_bottom_split) + pipe_ctx->plane_res.scl_data.recout.height /= 2; + else if (pri_split || sec_split) { + /* HMirror XOR Secondary_pipe XOR Rotation_180 */ + bool right_view = (sec_split != plane_state->horizontal_mirror) != + (plane_state->rotation == ROTATION_ANGLE_180); + + if (plane_state->rotation == ROTATION_ANGLE_90 + || plane_state->rotation == ROTATION_ANGLE_270) + /* Secondary_pipe XOR Rotation_270 */ + right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split; + + if (right_view) { + pipe_ctx->plane_res.scl_data.recout.x += + pipe_ctx->plane_res.scl_data.recout.width / 2; + /* Ceil offset pipe */ + pipe_ctx->plane_res.scl_data.recout.width = + (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; } else { - pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width / 2; - pipe_ctx->plane_res.scl_data.recout.width = (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; - } - } else if (pipe_ctx->bottom_pipe && - pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state) { - if (stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) - pipe_ctx->plane_res.scl_data.recout.height /= 2; - else pipe_ctx->plane_res.scl_data.recout.width /= 2; + } } - /* Unclipped recout offset = stream dst offset + ((surf dst offset - stream surf_src offset) * * 1/ stream scaling ratio) - (surf surf_src offset * 1/ full scl * ratio) */ - recout_full_x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x) + recout_full_x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x) * stream->dst.width / stream->src.width - surf_src.x * plane_state->dst_rect.width / surf_src.width * stream->dst.width / stream->src.width; - recout_full_y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y) + recout_full_y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y) * stream->dst.height / stream->src.height - surf_src.y * plane_state->dst_rect.height / surf_src.height * stream->dst.height / stream->src.height; @@ -650,7 +654,20 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r struct rect src = pipe_ctx->plane_state->src_rect; int vpc_div = (data->format == PIXEL_FORMAT_420BPP8 || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; + bool flip_vert_scan_dir = false, flip_horz_scan_dir = false; + /* + * Need to calculate the scan direction for viewport to make adjustments + */ + if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_180) { + flip_vert_scan_dir = true; + flip_horz_scan_dir = true; + } else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90) + flip_vert_scan_dir = true; + else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) + flip_horz_scan_dir = true; + if (pipe_ctx->plane_state->horizontal_mirror) + flip_horz_scan_dir = !flip_horz_scan_dir; if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { @@ -715,7 +732,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r } /* Adjust for non-0 viewport offset */ - if (data->viewport.x) { + if (data->viewport.x && !flip_horz_scan_dir) { int int_part; data->inits.h = dal_fixed31_32_add(data->inits.h, dal_fixed31_32_mul_int( @@ -736,7 +753,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r data->inits.h = dal_fixed31_32_add_int(data->inits.h, int_part); } - if (data->viewport_c.x) { + if (data->viewport_c.x && !flip_horz_scan_dir) { int int_part; data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_mul_int( @@ -757,7 +774,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r data->inits.h_c = dal_fixed31_32_add_int(data->inits.h_c, int_part); } - if (data->viewport.y) { + if (data->viewport.y && !flip_vert_scan_dir) { int int_part; data->inits.v = dal_fixed31_32_add(data->inits.v, dal_fixed31_32_mul_int( @@ -778,7 +795,7 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r data->inits.v = dal_fixed31_32_add_int(data->inits.v, int_part); } - if (data->viewport_c.y) { + if (data->viewport_c.y && !flip_vert_scan_dir) { int int_part; data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_mul_int( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 375fb457e22353c0714464aa1142dc867879d190..261811e0c094a81a0a1c0c1c6420f83d4db4a3bc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -226,7 +226,7 @@ bool dc_stream_set_cursor_attributes( if (pipe_ctx->plane_res.dpp != NULL && pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes != NULL) pipe_ctx->plane_res.dpp->funcs->set_cursor_attributes( - pipe_ctx->plane_res.dpp, attributes); + pipe_ctx->plane_res.dpp, attributes->color_format); } stream->cursor_attributes = *attributes; @@ -301,6 +301,8 @@ bool dc_stream_set_cursor_position( } + stream->cursor_position = *position; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index c99ed85ba9a263ffda28ca0a242ada1bffc2edeb..e2e3c9df79ea0f976dbbc6f6bf889169ccce0484 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -38,7 +38,7 @@ #include "inc/compressor.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.20" +#define DC_VER "3.1.27" #define MAX_SURFACES 3 #define MAX_STREAMS 6 @@ -250,6 +250,8 @@ struct dc { */ struct dm_pp_display_configuration prev_display_config; + bool optimized_required; + /* FBC compressor */ #if defined(CONFIG_DRM_AMD_DC_FBC) struct compressor *fbc_compressor; @@ -340,7 +342,7 @@ struct dc_hdr_static_metadata { enum dc_transfer_func_type { TF_TYPE_PREDEFINED, TF_TYPE_DISTRIBUTED_POINTS, - TF_TYPE_BYPASS + TF_TYPE_BYPASS, }; struct dc_transfer_func_distributed_points { @@ -359,6 +361,7 @@ enum dc_transfer_func_predefined { TRANSFER_FUNCTION_BT709, TRANSFER_FUNCTION_PQ, TRANSFER_FUNCTION_LINEAR, + TRANSFER_FUNCTION_UNITY, }; struct dc_transfer_func { @@ -385,6 +388,7 @@ union surface_update_flags { struct { /* Medium updates */ + uint32_t dcc_change:1; uint32_t color_space_change:1; uint32_t input_tf_change:1; uint32_t horizontal_mirror_change:1; @@ -436,6 +440,7 @@ struct dc_plane_state { enum dc_rotation_angle rotation; enum plane_stereo_format stereo_format; + bool is_tiling_rotated; bool per_pixel_alpha; bool visible; bool flip_immediate; diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index c584252669fde5a73813aee0b5a79ab51aeb9c76..48e1fcf53d43499d56980f36c35dd87ac8fb521d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -1,3 +1,25 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ /* * dc_helper.c * diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 587c0bb3d4ac5193b6dd0f21e6005f6320b98425..03029f72dc3f31f902438edeafabf73f502a57c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -579,8 +579,6 @@ enum dc_timing_standard { TIMING_STANDARD_MAX }; - - enum dc_color_depth { COLOR_DEPTH_UNDEFINED, COLOR_DEPTH_666, diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index fed0e5ea96256fc74f86c84b0e78904dc02b5f1d..01c60f11b2bdeec208f5df9e5cfd2092a614eac5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -86,6 +86,7 @@ struct dc_stream_state { struct dc_stream_status status; struct dc_cursor_attributes cursor_attributes; + struct dc_cursor_position cursor_position; /* from stream struct */ struct kref refcount; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 9291a60126ad3926df576288bb93cfbb27c0dcfd..9faddfae241dd2d382b7b9b0027264624b9e5e3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -218,6 +218,7 @@ struct dc_edid_caps { bool lte_340mcsc_scramble; bool edid_hdmi; + bool hdr_supported; }; struct view { diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile index 8abec0bed379c15c4c3fb52f620af53ee4af6fb7..11401fd8e5356c8e4866f6660df34ff2da1e155b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for common 'dce' logic # HW object file under this folder follow similar pattern for HW programming # - register offset and/or shift + mask stored in the dec_hw struct diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 3fe8e697483fff2c9b33d7c5e3f948c7cc68ac38..b48190f549079bb113dfc7d370fa8c7ff0c59439 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -385,21 +385,12 @@ static bool dce_abm_init_backlight(struct abm *abm) return true; } -static bool is_dmcu_initialized(struct abm *abm) -{ - struct dce_abm *abm_dce = TO_DCE_ABM(abm); - unsigned int dmcu_uc_reset; - - REG_GET(DMCU_STATUS, UC_IN_RESET, &dmcu_uc_reset); - - return !dmcu_uc_reset; -} - static bool dce_abm_set_backlight_level( struct abm *abm, unsigned int backlight_level, unsigned int frame_ramp, - unsigned int controller_id) + unsigned int controller_id, + bool use_smooth_brightness) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); @@ -408,7 +399,7 @@ static bool dce_abm_set_backlight_level( backlight_level, backlight_level); /* If DMCU is in reset state, DMCU is uninitialized */ - if (is_dmcu_initialized(abm)) + if (use_smooth_brightness) dmcu_set_backlight_level(abm_dce, backlight_level, frame_ramp, @@ -425,8 +416,7 @@ static const struct abm_funcs dce_funcs = { .init_backlight = dce_abm_init_backlight, .set_backlight_level = dce_abm_set_backlight_level, .get_current_backlight_8_bit = dce_abm_get_current_backlight_8_bit, - .set_abm_immediate_disable = dce_abm_immediate_disable, - .is_dmcu_initialized = is_dmcu_initialized + .set_abm_immediate_disable = dce_abm_immediate_disable }; static void dce_abm_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h index 59e909ec88f22778d9917fb754875a9e79da0c59..ff9436966041098ed84e586240cf779854301dbe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.h @@ -37,8 +37,7 @@ SR(LVTMA_PWRSEQ_REF_DIV), \ SR(MASTER_COMM_CNTL_REG), \ SR(MASTER_COMM_CMD_REG), \ - SR(MASTER_COMM_DATA_REG1), \ - SR(DMCU_STATUS) + SR(MASTER_COMM_DATA_REG1) #define ABM_DCE110_COMMON_REG_LIST() \ ABM_COMMON_REG_LIST_DCE_BASE(), \ @@ -84,8 +83,7 @@ ABM_SF(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, mask_sh), \ ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, mask_sh), \ ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE1, mask_sh), \ - ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE2, mask_sh), \ - ABM_SF(DMCU_STATUS, UC_IN_RESET, mask_sh) + ABM_SF(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE2, mask_sh) #define ABM_MASK_SH_LIST_DCE110(mask_sh) \ ABM_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(mask_sh), \ @@ -174,7 +172,6 @@ type MASTER_COMM_CMD_REG_BYTE2; \ type BL_PWM_REF_DIV; \ type BL_PWM_EN; \ - type UC_IN_RESET; \ type BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN; \ type BL_PWM_GRP1_REG_LOCK; \ type BL_PWM_GRP1_REG_UPDATE_PENDING @@ -206,7 +203,6 @@ struct dce_abm_registers { uint32_t MASTER_COMM_CMD_REG; uint32_t MASTER_COMM_DATA_REG1; uint32_t BIOS_SCRATCH_2; - uint32_t DMCU_STATUS; uint32_t BL_PWM_GRP1_REG_LOCK; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c index 9031d22285eab98578db34ea956f73bb6f6b300e..9e98a5f39a6dbbec76a9bb296f6327b20ef34fd0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c @@ -29,7 +29,6 @@ #include "fixed32_32.h" #include "bios_parser_interface.h" #include "dc.h" -#include "dce_abm.h" #include "dmcu.h" #if defined(CONFIG_DRM_AMD_DC_DCN1_0) #include "dcn_calcs.h" @@ -384,7 +383,6 @@ static int dce112_set_clock( struct bp_set_dce_clock_parameters dce_clk_params; struct dc_bios *bp = clk->ctx->dc_bios; struct dc *core_dc = clk->ctx->dc; - struct abm *abm = core_dc->res_pool->abm; struct dmcu *dmcu = core_dc->res_pool->dmcu; int actual_clock = requested_clk_khz; /* Prepare to program display clock*/ @@ -417,7 +415,7 @@ static int dce112_set_clock( bp->funcs->set_dce_clock(bp, &dce_clk_params); - if (abm->funcs->is_dmcu_initialized(abm) && clk_dce->dfs_bypass_disp_clk != actual_clock) + if (clk_dce->dfs_bypass_disp_clk != actual_clock) dmcu->funcs->set_psr_wait_loop(dmcu, actual_clock / 1000 / 7); clk_dce->dfs_bypass_disp_clk = actual_clock; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c index a6de99db0444deb729c9da06ca212b9d70109204..f663adb335849746af6c420d69cb424684c30bb5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c @@ -263,15 +263,35 @@ static void dce_dmcu_setup_psr(struct dmcu *dmcu, REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); } +static bool dce_is_dmcu_initialized(struct dmcu *dmcu) +{ + struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); + unsigned int dmcu_uc_reset; + + /* microcontroller is not running */ + REG_GET(DMCU_STATUS, UC_IN_RESET, &dmcu_uc_reset); + + /* DMCU is not running */ + if (dmcu_uc_reset) + return false; + + return true; +} + static void dce_psr_wait_loop( struct dmcu *dmcu, unsigned int wait_loop_number) { struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu); union dce_dmcu_psr_config_data_wait_loop_reg1 masterCmdData1; + if (dmcu->cached_wait_loop_number == wait_loop_number) return; + /* DMCU is not running */ + if (!dce_is_dmcu_initialized(dmcu)) + return; + /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 1, 10000); @@ -691,6 +711,14 @@ static void dcn10_get_psr_wait_loop( return; } +static bool dcn10_is_dmcu_initialized(struct dmcu *dmcu) +{ + /* microcontroller is not running */ + if (dmcu->dmcu_state != DMCU_RUNNING) + return false; + return true; +} + #endif static const struct dmcu_funcs dce_funcs = { @@ -700,7 +728,8 @@ static const struct dmcu_funcs dce_funcs = { .setup_psr = dce_dmcu_setup_psr, .get_psr_state = dce_get_dmcu_psr_state, .set_psr_wait_loop = dce_psr_wait_loop, - .get_psr_wait_loop = dce_get_psr_wait_loop + .get_psr_wait_loop = dce_get_psr_wait_loop, + .is_dmcu_initialized = dce_is_dmcu_initialized }; #if defined(CONFIG_DRM_AMD_DC_DCN1_0) @@ -711,7 +740,8 @@ static const struct dmcu_funcs dcn10_funcs = { .setup_psr = dcn10_dmcu_setup_psr, .get_psr_state = dcn10_get_dmcu_psr_state, .set_psr_wait_loop = dcn10_psr_wait_loop, - .get_psr_wait_loop = dcn10_get_psr_wait_loop + .get_psr_wait_loop = dcn10_get_psr_wait_loop, + .is_dmcu_initialized = dcn10_is_dmcu_initialized }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h index 4c25e2dd28f8ee4a6bebcca0e272992ea26a181c..1d4546f2313506a23449fccb137a834526bf0d23 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h @@ -62,6 +62,8 @@ DMCU_ENABLE, mask_sh), \ DMCU_SF(DMCU_STATUS, \ UC_IN_STOP_MODE, mask_sh), \ + DMCU_SF(DMCU_STATUS, \ + UC_IN_RESET, mask_sh), \ DMCU_SF(DMCU_RAM_ACCESS_CTRL, \ IRAM_HOST_ACCESS_EN, mask_sh), \ DMCU_SF(DMCU_RAM_ACCESS_CTRL, \ @@ -98,6 +100,7 @@ type IRAM_RD_ADDR_AUTO_INC; \ type DMCU_ENABLE; \ type UC_IN_STOP_MODE; \ + type UC_IN_RESET; \ type MASTER_COMM_CMD_REG_BYTE0; \ type MASTER_COMM_INTERRUPT; \ type DPHY_RX_FAST_TRAINING_CAPABLE; \ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index 3b0db253ac22f7b0afc841ad00fa7c829f477cf5..b73db9e784375618f87422f143cf36d00e1ce552 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -582,7 +582,8 @@ struct dce_hwseq_registers { type DOMAIN7_PGFSM_PWR_STATUS; \ type DCFCLK_GATE_DIS; \ type DCHUBBUB_GLOBAL_TIMER_REFDIV; \ - type DENTIST_DPPCLK_WDIVIDER; + type DENTIST_DPPCLK_WDIVIDER; \ + type DENTIST_DISPCLK_WDIVIDER; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index bad70c6b3aad5214ad07d0c8a3fc4378c6bb15dc..a266e3f5e75fd7ae82a46f7d3e09f900660c72cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -1072,21 +1072,6 @@ void dce110_link_encoder_disable_output( /* disable encoder */ if (dc_is_dp_signal(signal)) link_encoder_disable(enc110); - - /* - * TODO: Power control cause regression, we should implement - * it properly, for now just comment it. - */ -// if (enc110->base.connector.id == CONNECTOR_ID_EDP) { -// /* power down eDP panel */ -// link_encoder_edp_wait_for_hpd_ready( -// enc, -// enc->connector, -// false); -// -// link_encoder_edp_power_control( -// enc, false); -// } } void dce110_link_encoder_dp_set_lane_settings( diff --git a/drivers/gpu/drm/amd/display/dc/dce100/Makefile b/drivers/gpu/drm/amd/display/dc/dce100/Makefile index ea40870624b3884b41b2369987ecc850919f4f29..a822d4e2a1693881f090d776f6fbc11ab68d5f24 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce100/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 90911258bdb3ac92a490846199dfd85ba701ca1a..3ea43e2a9450ce562cd01b8a5c18a9ee060d2a62 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -1,5 +1,5 @@ /* -* Copyright 2012-15 Advanced Micro Devices, Inc. + * Copyright 2012-15 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h index de8fdf438f9b34125d7179aa6af941f68f18453a..2f366d66635d8f11ed566e025a69fafa6df96711 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h @@ -1,3 +1,26 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ /* * dce100_resource.h * diff --git a/drivers/gpu/drm/amd/display/dc/dce110/Makefile b/drivers/gpu/drm/amd/display/dc/dce110/Makefile index 98d956e2f218f7d2e4142f586513276b48f5a108..d564c0eb8b045393ae235c6169b6c85b6af94693 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce110/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e650bdcd9423b22e3d07cfb95b4e052059867ad1..86cdd7b4811fb7f1195ce7d8e73db9e8e42c6c3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -354,8 +354,8 @@ static bool convert_to_custom_float(struct pwl_result_data *rgb_resulted, return false; } - if (!convert_to_custom_float_format(arr_points[2].slope, &fmt, - &arr_points[2].custom_float_slope)) { + if (!convert_to_custom_float_format(arr_points[1].slope, &fmt, + &arr_points[1].custom_float_slope)) { BREAK_TO_DEBUGGER(); return false; } @@ -870,8 +870,6 @@ void hwss_edp_power_control( "%s: Skipping Panel Power action: %s\n", __func__, (power_up ? "On":"Off")); } - - hwss_edp_wait_for_hpd_ready(link, true); } /*todo: cloned in stream enc, fix*/ @@ -972,11 +970,9 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) } /* blank at encoder level */ - if (dc_is_dp_signal(pipe_ctx->stream->signal)) { - if (pipe_ctx->stream->sink->link->connector_signal == SIGNAL_TYPE_EDP) - hwss_edp_backlight_control(link, false); + if (dc_is_dp_signal(pipe_ctx->stream->signal)) pipe_ctx->stream_res.stream_enc->funcs->dp_blank(pipe_ctx->stream_res.stream_enc); - } + link->link_enc->funcs->connect_dig_be_to_fe( link->link_enc, pipe_ctx->stream_res.stream_enc->id, @@ -988,15 +984,12 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings) { struct encoder_unblank_param params = { { 0 } }; - struct dc_link *link = pipe_ctx->stream->sink->link; /* only 3 items below are used by unblank */ params.pixel_clk_khz = pipe_ctx->stream->timing.pix_clk_khz; params.link_settings.link_rate = link_settings->link_rate; pipe_ctx->stream_res.stream_enc->funcs->dp_unblank(pipe_ctx->stream_res.stream_enc, ¶ms); - if (link->connector_signal == SIGNAL_TYPE_EDP) - hwss_edp_backlight_control(link, true); } @@ -1342,10 +1335,8 @@ static void power_down_encoders(struct dc *dc) if (!dc->links[i]->wa_flags.dp_keep_receiver_powered) dp_receiver_power_ctrl(dc->links[i], false); - if (connector_id == CONNECTOR_ID_EDP) { + if (connector_id == CONNECTOR_ID_EDP) signal = SIGNAL_TYPE_EDP; - hwss_edp_backlight_control(dc->links[i], false); - } } dc->links[i]->link_enc->funcs->disable_output( @@ -1698,60 +1689,54 @@ static void apply_min_clocks( /* * Check if FBC can be enabled */ -static enum dc_status validate_fbc(struct dc *dc, - struct dc_state *context) +static bool should_enable_fbc(struct dc *dc, + struct dc_state *context) { - struct pipe_ctx *pipe_ctx = - &context->res_ctx.pipe_ctx[0]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0]; ASSERT(dc->fbc_compressor); /* FBC memory should be allocated */ if (!dc->ctx->fbc_gpu_addr) - return DC_ERROR_UNEXPECTED; + return false; /* Only supports single display */ if (context->stream_count != 1) - return DC_ERROR_UNEXPECTED; + return false; /* Only supports eDP */ if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) - return DC_ERROR_UNEXPECTED; + return false; /* PSR should not be enabled */ if (pipe_ctx->stream->sink->link->psr_enabled) - return DC_ERROR_UNEXPECTED; + return false; /* Nothing to compress */ if (!pipe_ctx->plane_state) - return DC_ERROR_UNEXPECTED; + return false; /* Only for non-linear tiling */ if (pipe_ctx->plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) - return DC_ERROR_UNEXPECTED; + return false; - return DC_OK; + return true; } /* * Enable FBC */ -static enum dc_status enable_fbc(struct dc *dc, - struct dc_state *context) +static void enable_fbc(struct dc *dc, + struct dc_state *context) { - enum dc_status status = validate_fbc(dc, context); - - if (status == DC_OK) { + if (should_enable_fbc(dc, context)) { /* Program GRPH COMPRESSED ADDRESS and PITCH */ struct compr_addr_and_pitch_params params = {0, 0, 0}; struct compressor *compr = dc->fbc_compressor; - struct pipe_ctx *pipe_ctx = - &context->res_ctx.pipe_ctx[0]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[0]; - params.source_view_width = - pipe_ctx->stream->timing.h_addressable; - params.source_view_height = - pipe_ctx->stream->timing.v_addressable; + params.source_view_width = pipe_ctx->stream->timing.h_addressable; + params.source_view_height = pipe_ctx->stream->timing.v_addressable; compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr; @@ -1760,7 +1745,6 @@ static enum dc_status enable_fbc(struct dc *dc, compr->funcs->enable_fbc(compr, ¶ms); } - return status; } #endif @@ -2026,8 +2010,7 @@ enum dc_status dce110_apply_ctx_to_hw( if (pipe_ctx->stream == pipe_ctx_old->stream) continue; - if (pipe_ctx->stream && pipe_ctx_old->stream - && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) + if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) continue; if (pipe_ctx->top_pipe) @@ -2063,9 +2046,6 @@ enum dc_status dce110_apply_ctx_to_hw( context, dc); - if (dc->hwss.enable_plane) - dc->hwss.enable_plane(dc, pipe_ctx, context); - if (DC_OK != status) return status; } @@ -2095,16 +2075,8 @@ static void set_default_colors(struct pipe_ctx *pipe_ctx) struct default_adjustment default_adjust = { 0 }; default_adjust.force_hw_default = false; - if (pipe_ctx->plane_state == NULL) - default_adjust.in_color_space = COLOR_SPACE_SRGB; - else - default_adjust.in_color_space = - pipe_ctx->plane_state->color_space; - if (pipe_ctx->stream == NULL) - default_adjust.out_color_space = COLOR_SPACE_SRGB; - else - default_adjust.out_color_space = - pipe_ctx->stream->output_color_space; + default_adjust.in_color_space = pipe_ctx->plane_state->color_space; + default_adjust.out_color_space = pipe_ctx->stream->output_color_space; default_adjust.csc_adjust_type = GRAPHICS_CSC_ADJUST_TYPE_SW; default_adjust.surface_pixel_format = pipe_ctx->plane_res.scl_data.format; @@ -2872,13 +2844,12 @@ static void dce110_apply_ctx_for_surface( continue; /* Need to allocate mem before program front end for Fiji */ - if (pipe_ctx->plane_res.mi != NULL) - pipe_ctx->plane_res.mi->funcs->allocate_mem_input( - pipe_ctx->plane_res.mi, - pipe_ctx->stream->timing.h_total, - pipe_ctx->stream->timing.v_total, - pipe_ctx->stream->timing.pix_clk_khz, - context->stream_count); + pipe_ctx->plane_res.mi->funcs->allocate_mem_input( + pipe_ctx->plane_res.mi, + pipe_ctx->stream->timing.h_total, + pipe_ctx->stream->timing.v_total, + pipe_ctx->stream->timing.pix_clk_khz, + context->stream_count); dce110_program_front_end_for_pipe(dc, pipe_ctx); @@ -2985,6 +2956,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .pplib_apply_display_requirements = pplib_apply_display_requirements, .edp_backlight_control = hwss_edp_backlight_control, .edp_power_control = hwss_edp_power_control, + .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, }; void dce110_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h index 2dd6ac63757292365ebd69e65591c241b0440ba4..fc637647f64331a568d3c62f1a3ed7e73d07afc2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h @@ -77,5 +77,9 @@ void hwss_edp_backlight_control( struct dc_link *link, bool enable); +void hwss_edp_wait_for_hpd_ready( + struct dc_link *link, + bool power_up); + #endif /* __DC_HWSS_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 5228ee78f7e6dc38384cb96f0bc733531c5cce68..7c4779578fb76528caef31a1a531f73214c86059 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -1,5 +1,5 @@ /* -* Copyright 2012-15 Advanced Micro Devices, Inc. + * Copyright 2012-15 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c index 07d9303d54772581a42fc1888b3267c0f44e728d..59b4cd3297155129096e7a16c4edd4ece401fd5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c @@ -1,3 +1,26 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + #include "dm_services.h" /* include DCE11 register header files */ diff --git a/drivers/gpu/drm/amd/display/dc/dce112/Makefile b/drivers/gpu/drm/amd/display/dc/dce112/Makefile index 265ac4310d855cd8ad8493ec7cb6d7de7bdd750f..8e090446d511937361db77ed56471a44d6ed897b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce112/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. diff --git a/drivers/gpu/drm/amd/display/dc/dce120/Makefile b/drivers/gpu/drm/amd/display/dc/dce120/Makefile index 1779b963525cda70d14c855a0e0acd0eb336cd63..37db1f8d45ea547862b07b2503c4a55d6485c8ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce120/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. @@ -8,4 +29,4 @@ dce120_hw_sequencer.o AMD_DAL_DCE120 = $(addprefix $(AMDDALPATH)/dc/dce120/,$(DCE120)) -AMD_DISPLAY_FILES += $(AMD_DAL_DCE120) \ No newline at end of file +AMD_DISPLAY_FILES += $(AMD_DAL_DCE120) diff --git a/drivers/gpu/drm/amd/display/dc/dce80/Makefile b/drivers/gpu/drm/amd/display/dc/dce80/Makefile index c1105895e5facd2cf3619e5432ba620123b926d5..bc388aa4b2f50cdd5c376b1f1833493dda24f550 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce80/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'controller' sub-component of DAL. # It provides the control and status of HW CRTC block. diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index a6ca1f97f7486cd51f7084dc1767be67409b2d5d..5469bdfe19f35125842b50bed2e169ab71df822d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -1,8 +1,29 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for DCN. DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ - dcn10_dpp.o dcn10_opp.o dcn10_timing_generator.o \ + dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ dcn10_hubp.o dcn10_mpc.o \ dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \ dcn10_hubbub.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 7f579cb19f4bb08ad2af104c9b693e1d5e250e78..53ba3600ee6ac9622ef5c54201b3dbc17df4de04 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -22,11 +22,12 @@ * Authors: AMD * */ - +#include "dc.h" #include "reg_helper.h" #include "dcn10_dpp.h" #include "dcn10_cm_common.h" +#include "custom_float.h" #define REG(reg) reg @@ -121,3 +122,294 @@ void cm_helper_program_xfer_func( } } + + + +bool cm_helper_convert_to_custom_float( + struct pwl_result_data *rgb_resulted, + struct curve_points *arr_points, + uint32_t hw_points_num, + bool fixpoint) +{ + struct custom_float_format fmt; + + struct pwl_result_data *rgb = rgb_resulted; + + uint32_t i = 0; + + fmt.exponenta_bits = 6; + fmt.mantissa_bits = 12; + fmt.sign = false; + + if (!convert_to_custom_float_format(arr_points[0].x, &fmt, + &arr_points[0].custom_float_x)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(arr_points[0].offset, &fmt, + &arr_points[0].custom_float_offset)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(arr_points[0].slope, &fmt, + &arr_points[0].custom_float_slope)) { + BREAK_TO_DEBUGGER(); + return false; + } + + fmt.mantissa_bits = 10; + fmt.sign = false; + + if (!convert_to_custom_float_format(arr_points[1].x, &fmt, + &arr_points[1].custom_float_x)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (fixpoint == true) + arr_points[1].custom_float_y = dal_fixed31_32_clamp_u0d14(arr_points[1].y); + else if (!convert_to_custom_float_format(arr_points[1].y, &fmt, + &arr_points[1].custom_float_y)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(arr_points[1].slope, &fmt, + &arr_points[1].custom_float_slope)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (hw_points_num == 0 || rgb_resulted == NULL || fixpoint == true) + return true; + + fmt.mantissa_bits = 12; + fmt.sign = true; + + while (i != hw_points_num) { + if (!convert_to_custom_float_format(rgb->red, &fmt, + &rgb->red_reg)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(rgb->green, &fmt, + &rgb->green_reg)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(rgb->blue, &fmt, + &rgb->blue_reg)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(rgb->delta_red, &fmt, + &rgb->delta_red_reg)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(rgb->delta_green, &fmt, + &rgb->delta_green_reg)) { + BREAK_TO_DEBUGGER(); + return false; + } + + if (!convert_to_custom_float_format(rgb->delta_blue, &fmt, + &rgb->delta_blue_reg)) { + BREAK_TO_DEBUGGER(); + return false; + } + + ++rgb; + ++i; + } + + return true; +} + + +#define MAX_REGIONS_NUMBER 34 +#define MAX_LOW_POINT 25 +#define NUMBER_SEGMENTS 32 + +bool cm_helper_translate_curve_to_hw_format( + const struct dc_transfer_func *output_tf, + struct pwl_params *lut_params, bool fixpoint) +{ + struct curve_points *arr_points; + struct pwl_result_data *rgb_resulted; + struct pwl_result_data *rgb; + struct pwl_result_data *rgb_plus_1; + struct fixed31_32 y_r; + struct fixed31_32 y_g; + struct fixed31_32 y_b; + struct fixed31_32 y1_min; + struct fixed31_32 y3_max; + + int32_t segment_start, segment_end; + int32_t i; + uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; + + if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) + return false; + + PERF_TRACE(); + + arr_points = lut_params->arr_points; + rgb_resulted = lut_params->rgb_resulted; + hw_points = 0; + + memset(lut_params, 0, sizeof(struct pwl_params)); + memset(seg_distr, 0, sizeof(seg_distr)); + + if (output_tf->tf == TRANSFER_FUNCTION_PQ) { + /* 32 segments + * segments are from 2^-25 to 2^7 + */ + for (i = 0; i < 32 ; i++) + seg_distr[i] = 3; + + segment_start = -25; + segment_end = 7; + } else { + /* 10 segments + * segment is from 2^-10 to 2^0 + * There are less than 256 points, for optimization + */ + seg_distr[0] = 3; + seg_distr[1] = 4; + seg_distr[2] = 4; + seg_distr[3] = 4; + seg_distr[4] = 4; + seg_distr[5] = 4; + seg_distr[6] = 4; + seg_distr[7] = 4; + seg_distr[8] = 5; + seg_distr[9] = 5; + + segment_start = -10; + segment_end = 0; + } + + for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++) + seg_distr[i] = -1; + + for (k = 0; k < MAX_REGIONS_NUMBER; k++) { + if (seg_distr[k] != -1) + hw_points += (1 << seg_distr[k]); + } + + j = 0; + for (k = 0; k < (segment_end - segment_start); k++) { + increment = NUMBER_SEGMENTS / (1 << seg_distr[k]); + start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS; + for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) { + if (j == hw_points - 1) + break; + rgb_resulted[j].red = output_tf->tf_pts.red[i]; + rgb_resulted[j].green = output_tf->tf_pts.green[i]; + rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; + j++; + } + } + + /* last point */ + start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS; + rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; + rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; + rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + + arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), + dal_fixed31_32_from_int(segment_start)); + arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), + dal_fixed31_32_from_int(segment_end)); + + y_r = rgb_resulted[0].red; + y_g = rgb_resulted[0].green; + y_b = rgb_resulted[0].blue; + + y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b)); + + arr_points[0].y = y1_min; + arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x); + y_r = rgb_resulted[hw_points - 1].red; + y_g = rgb_resulted[hw_points - 1].green; + y_b = rgb_resulted[hw_points - 1].blue; + + /* see comment above, m_arrPoints[1].y should be the Y value for the + * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) + */ + y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b)); + + arr_points[1].y = y3_max; + + arr_points[1].slope = dal_fixed31_32_zero; + + if (output_tf->tf == TRANSFER_FUNCTION_PQ) { + /* for PQ, we want to have a straight line from last HW X point, + * and the slope to be such that we hit 1.0 at 10000 nits. + */ + const struct fixed31_32 end_value = + dal_fixed31_32_from_int(125); + + arr_points[1].slope = dal_fixed31_32_div( + dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y), + dal_fixed31_32_sub(end_value, arr_points[1].x)); + } + + lut_params->hw_points_num = hw_points; + + i = 1; + for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) { + if (seg_distr[k] != -1) { + lut_params->arr_curve_points[k].segments_num = + seg_distr[k]; + lut_params->arr_curve_points[i].offset = + lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); + } + i++; + } + + if (seg_distr[k] != -1) + lut_params->arr_curve_points[k].segments_num = seg_distr[k]; + + rgb = rgb_resulted; + rgb_plus_1 = rgb_resulted + 1; + + i = 1; + while (i != hw_points + 1) { + if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red)) + rgb_plus_1->red = rgb->red; + if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green)) + rgb_plus_1->green = rgb->green; + if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue)) + rgb_plus_1->blue = rgb->blue; + + rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red); + rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green); + rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue); + + if (fixpoint == true) { + rgb->delta_red_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_red); + rgb->delta_green_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_green); + rgb->delta_blue_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_blue); + rgb->red_reg = dal_fixed31_32_clamp_u0d14(rgb->red); + rgb->green_reg = dal_fixed31_32_clamp_u0d14(rgb->green); + rgb->blue_reg = dal_fixed31_32_clamp_u0d14(rgb->blue); + } + + ++rgb_plus_1; + ++rgb; + ++i; + } + cm_helper_convert_to_custom_float(rgb_resulted, + lut_params->arr_points, + hw_points, fixpoint); + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h index 64836dcf21f2296da5d673e400ca4267b905a813..64e476b83bcba88528c81151bb935725069bc561 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h @@ -96,4 +96,14 @@ void cm_helper_program_xfer_func( const struct pwl_params *params, const struct xfer_func_reg *reg); +bool cm_helper_convert_to_custom_float( + struct pwl_result_data *rgb_resulted, + struct curve_points *arr_points, + uint32_t hw_points_num, + bool fixpoint); + +bool cm_helper_translate_curve_to_hw_format( + const struct dc_transfer_func *output_tf, + struct pwl_params *lut_params, bool fixpoint); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index 8df3945370cf1d8fe052aee667862658df4db6f4..f2a08b156cf0018f2de676fb65d9c0d0ce837db3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps( scl_data->taps.h_taps = 1; if (IDENTITY_RATIO(scl_data->ratios.vert)) scl_data->taps.v_taps = 1; - /* - * Spreadsheet doesn't handle taps_c is one properly, - * need to force Chroma to always be scaled to pass - * bandwidth validation. - */ + if (IDENTITY_RATIO(scl_data->ratios.horz_c)) + scl_data->taps.h_taps_c = 1; + if (IDENTITY_RATIO(scl_data->ratios.vert_c)) + scl_data->taps.v_taps_c = 1; } return true; @@ -386,10 +385,9 @@ void dpp1_cnv_setup ( void dpp1_set_cursor_attributes( struct dpp *dpp_base, - const struct dc_cursor_attributes *attr) + enum dc_cursor_color_format color_format) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - enum dc_cursor_color_format color_format = attr->color_format; REG_UPDATE_2(CURSOR0_CONTROL, CUR0_MODE, color_format, @@ -402,13 +400,6 @@ void dpp1_set_cursor_attributes( REG_UPDATE(CURSOR0_COLOR1, CUR0_COLOR1, 0xFFFFFFFF); } - - /* TODO: Fixed vs float */ - - REG_UPDATE_3(FORMAT_CONTROL, - CNVC_BYPASS, 0, - FORMAT_CONTROL__ALPHA_EN, 1, - FORMAT_EXPANSION_MODE, 0); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h index ad71fb50f8a51793c6673c0f3e3a4d4d3a421570..f56ee4d08d893f1fc908804e997e87589a6fef70 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h @@ -730,8 +730,9 @@ type CM_BLNDGAM_RAMA_EXP_REGION33_NUM_SEGMENTS; \ type CM_BLNDGAM_LUT_WRITE_EN_MASK; \ type CM_BLNDGAM_LUT_WRITE_SEL; \ + type CM_BLNDGAM_CONFIG_STATUS; \ type CM_BLNDGAM_LUT_INDEX; \ - type CM_BLNDGAM_LUT_DATA; \ + type BLNDGAM_MEM_PWR_FORCE; \ type CM_3DLUT_MODE; \ type CM_3DLUT_SIZE; \ type CM_3DLUT_INDEX; \ @@ -905,6 +906,7 @@ type CM_SHAPER_RAMA_EXP_REGION33_LUT_OFFSET; \ type CM_SHAPER_RAMA_EXP_REGION33_NUM_SEGMENTS; \ type CM_SHAPER_LUT_WRITE_EN_MASK; \ + type CM_SHAPER_CONFIG_STATUS; \ type CM_SHAPER_LUT_WRITE_SEL; \ type CM_SHAPER_LUT_INDEX; \ type CM_SHAPER_LUT_DATA; \ @@ -1005,258 +1007,255 @@ type CM_BYPASS; \ type FORMAT_CONTROL__ALPHA_EN; \ type CUR0_COLOR0; \ - type CUR0_COLOR1 - - + type CUR0_COLOR1; struct dcn_dpp_shift { - TF_REG_FIELD_LIST(uint8_t); + TF_REG_FIELD_LIST(uint8_t) }; struct dcn_dpp_mask { - TF_REG_FIELD_LIST(uint32_t); + TF_REG_FIELD_LIST(uint32_t) }; - - +#define DPP_COMMON_REG_VARIABLE_LIST \ + uint32_t DSCL_EXT_OVERSCAN_LEFT_RIGHT; \ + uint32_t DSCL_EXT_OVERSCAN_TOP_BOTTOM; \ + uint32_t OTG_H_BLANK; \ + uint32_t OTG_V_BLANK; \ + uint32_t SCL_MODE; \ + uint32_t LB_DATA_FORMAT; \ + uint32_t LB_MEMORY_CTRL; \ + uint32_t DSCL_AUTOCAL; \ + uint32_t SCL_BLACK_OFFSET; \ + uint32_t SCL_TAP_CONTROL; \ + uint32_t SCL_COEF_RAM_TAP_SELECT; \ + uint32_t SCL_COEF_RAM_TAP_DATA; \ + uint32_t DSCL_2TAP_CONTROL; \ + uint32_t MPC_SIZE; \ + uint32_t SCL_HORZ_FILTER_SCALE_RATIO; \ + uint32_t SCL_VERT_FILTER_SCALE_RATIO; \ + uint32_t SCL_HORZ_FILTER_SCALE_RATIO_C; \ + uint32_t SCL_VERT_FILTER_SCALE_RATIO_C; \ + uint32_t SCL_HORZ_FILTER_INIT; \ + uint32_t SCL_HORZ_FILTER_INIT_C; \ + uint32_t SCL_VERT_FILTER_INIT; \ + uint32_t SCL_VERT_FILTER_INIT_BOT; \ + uint32_t SCL_VERT_FILTER_INIT_C; \ + uint32_t SCL_VERT_FILTER_INIT_BOT_C; \ + uint32_t RECOUT_START; \ + uint32_t RECOUT_SIZE; \ + uint32_t CM_GAMUT_REMAP_CONTROL; \ + uint32_t CM_GAMUT_REMAP_C11_C12; \ + uint32_t CM_GAMUT_REMAP_C33_C34; \ + uint32_t CM_COMA_C11_C12; \ + uint32_t CM_COMA_C33_C34; \ + uint32_t CM_COMB_C11_C12; \ + uint32_t CM_COMB_C33_C34; \ + uint32_t CM_OCSC_CONTROL; \ + uint32_t CM_OCSC_C11_C12; \ + uint32_t CM_OCSC_C33_C34; \ + uint32_t CM_MEM_PWR_CTRL; \ + uint32_t CM_RGAM_LUT_DATA; \ + uint32_t CM_RGAM_LUT_WRITE_EN_MASK; \ + uint32_t CM_RGAM_LUT_INDEX; \ + uint32_t CM_RGAM_RAMB_START_CNTL_B; \ + uint32_t CM_RGAM_RAMB_START_CNTL_G; \ + uint32_t CM_RGAM_RAMB_START_CNTL_R; \ + uint32_t CM_RGAM_RAMB_SLOPE_CNTL_B; \ + uint32_t CM_RGAM_RAMB_SLOPE_CNTL_G; \ + uint32_t CM_RGAM_RAMB_SLOPE_CNTL_R; \ + uint32_t CM_RGAM_RAMB_END_CNTL1_B; \ + uint32_t CM_RGAM_RAMB_END_CNTL2_B; \ + uint32_t CM_RGAM_RAMB_END_CNTL1_G; \ + uint32_t CM_RGAM_RAMB_END_CNTL2_G; \ + uint32_t CM_RGAM_RAMB_END_CNTL1_R; \ + uint32_t CM_RGAM_RAMB_END_CNTL2_R; \ + uint32_t CM_RGAM_RAMB_REGION_0_1; \ + uint32_t CM_RGAM_RAMB_REGION_32_33; \ + uint32_t CM_RGAM_RAMA_START_CNTL_B; \ + uint32_t CM_RGAM_RAMA_START_CNTL_G; \ + uint32_t CM_RGAM_RAMA_START_CNTL_R; \ + uint32_t CM_RGAM_RAMA_SLOPE_CNTL_B; \ + uint32_t CM_RGAM_RAMA_SLOPE_CNTL_G; \ + uint32_t CM_RGAM_RAMA_SLOPE_CNTL_R; \ + uint32_t CM_RGAM_RAMA_END_CNTL1_B; \ + uint32_t CM_RGAM_RAMA_END_CNTL2_B; \ + uint32_t CM_RGAM_RAMA_END_CNTL1_G; \ + uint32_t CM_RGAM_RAMA_END_CNTL2_G; \ + uint32_t CM_RGAM_RAMA_END_CNTL1_R; \ + uint32_t CM_RGAM_RAMA_END_CNTL2_R; \ + uint32_t CM_RGAM_RAMA_REGION_0_1; \ + uint32_t CM_RGAM_RAMA_REGION_32_33; \ + uint32_t CM_RGAM_CONTROL; \ + uint32_t CM_CMOUT_CONTROL; \ + uint32_t CM_BLNDGAM_LUT_WRITE_EN_MASK; \ + uint32_t CM_BLNDGAM_CONTROL; \ + uint32_t CM_BLNDGAM_RAMB_START_CNTL_B; \ + uint32_t CM_BLNDGAM_RAMB_START_CNTL_G; \ + uint32_t CM_BLNDGAM_RAMB_START_CNTL_R; \ + uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_B; \ + uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_G; \ + uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_R; \ + uint32_t CM_BLNDGAM_RAMB_END_CNTL1_B; \ + uint32_t CM_BLNDGAM_RAMB_END_CNTL2_B; \ + uint32_t CM_BLNDGAM_RAMB_END_CNTL1_G; \ + uint32_t CM_BLNDGAM_RAMB_END_CNTL2_G; \ + uint32_t CM_BLNDGAM_RAMB_END_CNTL1_R; \ + uint32_t CM_BLNDGAM_RAMB_END_CNTL2_R; \ + uint32_t CM_BLNDGAM_RAMB_REGION_0_1; \ + uint32_t CM_BLNDGAM_RAMB_REGION_2_3; \ + uint32_t CM_BLNDGAM_RAMB_REGION_4_5; \ + uint32_t CM_BLNDGAM_RAMB_REGION_6_7; \ + uint32_t CM_BLNDGAM_RAMB_REGION_8_9; \ + uint32_t CM_BLNDGAM_RAMB_REGION_10_11; \ + uint32_t CM_BLNDGAM_RAMB_REGION_12_13; \ + uint32_t CM_BLNDGAM_RAMB_REGION_14_15; \ + uint32_t CM_BLNDGAM_RAMB_REGION_16_17; \ + uint32_t CM_BLNDGAM_RAMB_REGION_18_19; \ + uint32_t CM_BLNDGAM_RAMB_REGION_20_21; \ + uint32_t CM_BLNDGAM_RAMB_REGION_22_23; \ + uint32_t CM_BLNDGAM_RAMB_REGION_24_25; \ + uint32_t CM_BLNDGAM_RAMB_REGION_26_27; \ + uint32_t CM_BLNDGAM_RAMB_REGION_28_29; \ + uint32_t CM_BLNDGAM_RAMB_REGION_30_31; \ + uint32_t CM_BLNDGAM_RAMB_REGION_32_33; \ + uint32_t CM_BLNDGAM_RAMA_START_CNTL_B; \ + uint32_t CM_BLNDGAM_RAMA_START_CNTL_G; \ + uint32_t CM_BLNDGAM_RAMA_START_CNTL_R; \ + uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_B; \ + uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_G; \ + uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_R; \ + uint32_t CM_BLNDGAM_RAMA_END_CNTL1_B; \ + uint32_t CM_BLNDGAM_RAMA_END_CNTL2_B; \ + uint32_t CM_BLNDGAM_RAMA_END_CNTL1_G; \ + uint32_t CM_BLNDGAM_RAMA_END_CNTL2_G; \ + uint32_t CM_BLNDGAM_RAMA_END_CNTL1_R; \ + uint32_t CM_BLNDGAM_RAMA_END_CNTL2_R; \ + uint32_t CM_BLNDGAM_RAMA_REGION_0_1; \ + uint32_t CM_BLNDGAM_RAMA_REGION_2_3; \ + uint32_t CM_BLNDGAM_RAMA_REGION_4_5; \ + uint32_t CM_BLNDGAM_RAMA_REGION_6_7; \ + uint32_t CM_BLNDGAM_RAMA_REGION_8_9; \ + uint32_t CM_BLNDGAM_RAMA_REGION_10_11; \ + uint32_t CM_BLNDGAM_RAMA_REGION_12_13; \ + uint32_t CM_BLNDGAM_RAMA_REGION_14_15; \ + uint32_t CM_BLNDGAM_RAMA_REGION_16_17; \ + uint32_t CM_BLNDGAM_RAMA_REGION_18_19; \ + uint32_t CM_BLNDGAM_RAMA_REGION_20_21; \ + uint32_t CM_BLNDGAM_RAMA_REGION_22_23; \ + uint32_t CM_BLNDGAM_RAMA_REGION_24_25; \ + uint32_t CM_BLNDGAM_RAMA_REGION_26_27; \ + uint32_t CM_BLNDGAM_RAMA_REGION_28_29; \ + uint32_t CM_BLNDGAM_RAMA_REGION_30_31; \ + uint32_t CM_BLNDGAM_RAMA_REGION_32_33; \ + uint32_t CM_BLNDGAM_LUT_INDEX; \ + uint32_t CM_3DLUT_MODE; \ + uint32_t CM_3DLUT_INDEX; \ + uint32_t CM_3DLUT_DATA; \ + uint32_t CM_3DLUT_DATA_30BIT; \ + uint32_t CM_3DLUT_READ_WRITE_CONTROL; \ + uint32_t CM_SHAPER_LUT_WRITE_EN_MASK; \ + uint32_t CM_SHAPER_CONTROL; \ + uint32_t CM_SHAPER_RAMB_START_CNTL_B; \ + uint32_t CM_SHAPER_RAMB_START_CNTL_G; \ + uint32_t CM_SHAPER_RAMB_START_CNTL_R; \ + uint32_t CM_SHAPER_RAMB_END_CNTL_B; \ + uint32_t CM_SHAPER_RAMB_END_CNTL_G; \ + uint32_t CM_SHAPER_RAMB_END_CNTL_R; \ + uint32_t CM_SHAPER_RAMB_REGION_0_1; \ + uint32_t CM_SHAPER_RAMB_REGION_2_3; \ + uint32_t CM_SHAPER_RAMB_REGION_4_5; \ + uint32_t CM_SHAPER_RAMB_REGION_6_7; \ + uint32_t CM_SHAPER_RAMB_REGION_8_9; \ + uint32_t CM_SHAPER_RAMB_REGION_10_11; \ + uint32_t CM_SHAPER_RAMB_REGION_12_13; \ + uint32_t CM_SHAPER_RAMB_REGION_14_15; \ + uint32_t CM_SHAPER_RAMB_REGION_16_17; \ + uint32_t CM_SHAPER_RAMB_REGION_18_19; \ + uint32_t CM_SHAPER_RAMB_REGION_20_21; \ + uint32_t CM_SHAPER_RAMB_REGION_22_23; \ + uint32_t CM_SHAPER_RAMB_REGION_24_25; \ + uint32_t CM_SHAPER_RAMB_REGION_26_27; \ + uint32_t CM_SHAPER_RAMB_REGION_28_29; \ + uint32_t CM_SHAPER_RAMB_REGION_30_31; \ + uint32_t CM_SHAPER_RAMB_REGION_32_33; \ + uint32_t CM_SHAPER_RAMA_START_CNTL_B; \ + uint32_t CM_SHAPER_RAMA_START_CNTL_G; \ + uint32_t CM_SHAPER_RAMA_START_CNTL_R; \ + uint32_t CM_SHAPER_RAMA_END_CNTL_B; \ + uint32_t CM_SHAPER_RAMA_END_CNTL_G; \ + uint32_t CM_SHAPER_RAMA_END_CNTL_R; \ + uint32_t CM_SHAPER_RAMA_REGION_0_1; \ + uint32_t CM_SHAPER_RAMA_REGION_2_3; \ + uint32_t CM_SHAPER_RAMA_REGION_4_5; \ + uint32_t CM_SHAPER_RAMA_REGION_6_7; \ + uint32_t CM_SHAPER_RAMA_REGION_8_9; \ + uint32_t CM_SHAPER_RAMA_REGION_10_11; \ + uint32_t CM_SHAPER_RAMA_REGION_12_13; \ + uint32_t CM_SHAPER_RAMA_REGION_14_15; \ + uint32_t CM_SHAPER_RAMA_REGION_16_17; \ + uint32_t CM_SHAPER_RAMA_REGION_18_19; \ + uint32_t CM_SHAPER_RAMA_REGION_20_21; \ + uint32_t CM_SHAPER_RAMA_REGION_22_23; \ + uint32_t CM_SHAPER_RAMA_REGION_24_25; \ + uint32_t CM_SHAPER_RAMA_REGION_26_27; \ + uint32_t CM_SHAPER_RAMA_REGION_28_29; \ + uint32_t CM_SHAPER_RAMA_REGION_30_31; \ + uint32_t CM_SHAPER_RAMA_REGION_32_33; \ + uint32_t CM_SHAPER_LUT_INDEX; \ + uint32_t CM_SHAPER_LUT_DATA; \ + uint32_t CM_ICSC_CONTROL; \ + uint32_t CM_ICSC_C11_C12; \ + uint32_t CM_ICSC_C33_C34; \ + uint32_t CM_BNS_VALUES_R; \ + uint32_t CM_BNS_VALUES_G; \ + uint32_t CM_BNS_VALUES_B; \ + uint32_t CM_DGAM_RAMB_START_CNTL_B; \ + uint32_t CM_DGAM_RAMB_START_CNTL_G; \ + uint32_t CM_DGAM_RAMB_START_CNTL_R; \ + uint32_t CM_DGAM_RAMB_SLOPE_CNTL_B; \ + uint32_t CM_DGAM_RAMB_SLOPE_CNTL_G; \ + uint32_t CM_DGAM_RAMB_SLOPE_CNTL_R; \ + uint32_t CM_DGAM_RAMB_END_CNTL1_B; \ + uint32_t CM_DGAM_RAMB_END_CNTL2_B; \ + uint32_t CM_DGAM_RAMB_END_CNTL1_G; \ + uint32_t CM_DGAM_RAMB_END_CNTL2_G; \ + uint32_t CM_DGAM_RAMB_END_CNTL1_R; \ + uint32_t CM_DGAM_RAMB_END_CNTL2_R; \ + uint32_t CM_DGAM_RAMB_REGION_0_1; \ + uint32_t CM_DGAM_RAMB_REGION_14_15; \ + uint32_t CM_DGAM_RAMA_START_CNTL_B; \ + uint32_t CM_DGAM_RAMA_START_CNTL_G; \ + uint32_t CM_DGAM_RAMA_START_CNTL_R; \ + uint32_t CM_DGAM_RAMA_SLOPE_CNTL_B; \ + uint32_t CM_DGAM_RAMA_SLOPE_CNTL_G; \ + uint32_t CM_DGAM_RAMA_SLOPE_CNTL_R; \ + uint32_t CM_DGAM_RAMA_END_CNTL1_B; \ + uint32_t CM_DGAM_RAMA_END_CNTL2_B; \ + uint32_t CM_DGAM_RAMA_END_CNTL1_G; \ + uint32_t CM_DGAM_RAMA_END_CNTL2_G; \ + uint32_t CM_DGAM_RAMA_END_CNTL1_R; \ + uint32_t CM_DGAM_RAMA_END_CNTL2_R; \ + uint32_t CM_DGAM_RAMA_REGION_0_1; \ + uint32_t CM_DGAM_RAMA_REGION_14_15; \ + uint32_t CM_DGAM_LUT_WRITE_EN_MASK; \ + uint32_t CM_DGAM_LUT_INDEX; \ + uint32_t CM_DGAM_LUT_DATA; \ + uint32_t CM_CONTROL; \ + uint32_t CM_DGAM_CONTROL; \ + uint32_t CM_IGAM_CONTROL; \ + uint32_t CM_IGAM_LUT_RW_CONTROL; \ + uint32_t CM_IGAM_LUT_RW_INDEX; \ + uint32_t CM_IGAM_LUT_SEQ_COLOR; \ + uint32_t FORMAT_CONTROL; \ + uint32_t CNVC_SURFACE_PIXEL_FORMAT; \ + uint32_t CURSOR_CONTROL; \ + uint32_t CURSOR0_CONTROL; \ + uint32_t CURSOR0_COLOR0; \ + uint32_t CURSOR0_COLOR1; struct dcn_dpp_registers { - uint32_t DSCL_EXT_OVERSCAN_LEFT_RIGHT; - uint32_t DSCL_EXT_OVERSCAN_TOP_BOTTOM; - uint32_t OTG_H_BLANK; - uint32_t OTG_V_BLANK; - uint32_t SCL_MODE; - uint32_t LB_DATA_FORMAT; - uint32_t LB_MEMORY_CTRL; - uint32_t DSCL_AUTOCAL; - uint32_t SCL_BLACK_OFFSET; - uint32_t SCL_TAP_CONTROL; - uint32_t SCL_COEF_RAM_TAP_SELECT; - uint32_t SCL_COEF_RAM_TAP_DATA; - uint32_t DSCL_2TAP_CONTROL; - uint32_t MPC_SIZE; - uint32_t SCL_HORZ_FILTER_SCALE_RATIO; - uint32_t SCL_VERT_FILTER_SCALE_RATIO; - uint32_t SCL_HORZ_FILTER_SCALE_RATIO_C; - uint32_t SCL_VERT_FILTER_SCALE_RATIO_C; - uint32_t SCL_HORZ_FILTER_INIT; - uint32_t SCL_HORZ_FILTER_INIT_C; - uint32_t SCL_VERT_FILTER_INIT; - uint32_t SCL_VERT_FILTER_INIT_BOT; - uint32_t SCL_VERT_FILTER_INIT_C; - uint32_t SCL_VERT_FILTER_INIT_BOT_C; - uint32_t RECOUT_START; - uint32_t RECOUT_SIZE; - uint32_t CM_GAMUT_REMAP_CONTROL; - uint32_t CM_GAMUT_REMAP_C11_C12; - uint32_t CM_GAMUT_REMAP_C33_C34; - uint32_t CM_COMA_C11_C12; - uint32_t CM_COMA_C33_C34; - uint32_t CM_COMB_C11_C12; - uint32_t CM_COMB_C33_C34; - uint32_t CM_OCSC_CONTROL; - uint32_t CM_OCSC_C11_C12; - uint32_t CM_OCSC_C33_C34; - uint32_t CM_MEM_PWR_CTRL; - uint32_t CM_RGAM_LUT_DATA; - uint32_t CM_RGAM_LUT_WRITE_EN_MASK; - uint32_t CM_RGAM_LUT_INDEX; - uint32_t CM_RGAM_RAMB_START_CNTL_B; - uint32_t CM_RGAM_RAMB_START_CNTL_G; - uint32_t CM_RGAM_RAMB_START_CNTL_R; - uint32_t CM_RGAM_RAMB_SLOPE_CNTL_B; - uint32_t CM_RGAM_RAMB_SLOPE_CNTL_G; - uint32_t CM_RGAM_RAMB_SLOPE_CNTL_R; - uint32_t CM_RGAM_RAMB_END_CNTL1_B; - uint32_t CM_RGAM_RAMB_END_CNTL2_B; - uint32_t CM_RGAM_RAMB_END_CNTL1_G; - uint32_t CM_RGAM_RAMB_END_CNTL2_G; - uint32_t CM_RGAM_RAMB_END_CNTL1_R; - uint32_t CM_RGAM_RAMB_END_CNTL2_R; - uint32_t CM_RGAM_RAMB_REGION_0_1; - uint32_t CM_RGAM_RAMB_REGION_32_33; - uint32_t CM_RGAM_RAMA_START_CNTL_B; - uint32_t CM_RGAM_RAMA_START_CNTL_G; - uint32_t CM_RGAM_RAMA_START_CNTL_R; - uint32_t CM_RGAM_RAMA_SLOPE_CNTL_B; - uint32_t CM_RGAM_RAMA_SLOPE_CNTL_G; - uint32_t CM_RGAM_RAMA_SLOPE_CNTL_R; - uint32_t CM_RGAM_RAMA_END_CNTL1_B; - uint32_t CM_RGAM_RAMA_END_CNTL2_B; - uint32_t CM_RGAM_RAMA_END_CNTL1_G; - uint32_t CM_RGAM_RAMA_END_CNTL2_G; - uint32_t CM_RGAM_RAMA_END_CNTL1_R; - uint32_t CM_RGAM_RAMA_END_CNTL2_R; - uint32_t CM_RGAM_RAMA_REGION_0_1; - uint32_t CM_RGAM_RAMA_REGION_32_33; - uint32_t CM_RGAM_CONTROL; - uint32_t CM_CMOUT_CONTROL; - uint32_t CM_BLNDGAM_LUT_WRITE_EN_MASK; - uint32_t CM_BLNDGAM_CONTROL; - uint32_t CM_BLNDGAM_RAMB_START_CNTL_B; - uint32_t CM_BLNDGAM_RAMB_START_CNTL_G; - uint32_t CM_BLNDGAM_RAMB_START_CNTL_R; - uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_B; - uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_G; - uint32_t CM_BLNDGAM_RAMB_SLOPE_CNTL_R; - uint32_t CM_BLNDGAM_RAMB_END_CNTL1_B; - uint32_t CM_BLNDGAM_RAMB_END_CNTL2_B; - uint32_t CM_BLNDGAM_RAMB_END_CNTL1_G; - uint32_t CM_BLNDGAM_RAMB_END_CNTL2_G; - uint32_t CM_BLNDGAM_RAMB_END_CNTL1_R; - uint32_t CM_BLNDGAM_RAMB_END_CNTL2_R; - uint32_t CM_BLNDGAM_RAMB_REGION_0_1; - uint32_t CM_BLNDGAM_RAMB_REGION_2_3; - uint32_t CM_BLNDGAM_RAMB_REGION_4_5; - uint32_t CM_BLNDGAM_RAMB_REGION_6_7; - uint32_t CM_BLNDGAM_RAMB_REGION_8_9; - uint32_t CM_BLNDGAM_RAMB_REGION_10_11; - uint32_t CM_BLNDGAM_RAMB_REGION_12_13; - uint32_t CM_BLNDGAM_RAMB_REGION_14_15; - uint32_t CM_BLNDGAM_RAMB_REGION_16_17; - uint32_t CM_BLNDGAM_RAMB_REGION_18_19; - uint32_t CM_BLNDGAM_RAMB_REGION_20_21; - uint32_t CM_BLNDGAM_RAMB_REGION_22_23; - uint32_t CM_BLNDGAM_RAMB_REGION_24_25; - uint32_t CM_BLNDGAM_RAMB_REGION_26_27; - uint32_t CM_BLNDGAM_RAMB_REGION_28_29; - uint32_t CM_BLNDGAM_RAMB_REGION_30_31; - uint32_t CM_BLNDGAM_RAMB_REGION_32_33; - uint32_t CM_BLNDGAM_RAMA_START_CNTL_B; - uint32_t CM_BLNDGAM_RAMA_START_CNTL_G; - uint32_t CM_BLNDGAM_RAMA_START_CNTL_R; - uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_B; - uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_G; - uint32_t CM_BLNDGAM_RAMA_SLOPE_CNTL_R; - uint32_t CM_BLNDGAM_RAMA_END_CNTL1_B; - uint32_t CM_BLNDGAM_RAMA_END_CNTL2_B; - uint32_t CM_BLNDGAM_RAMA_END_CNTL1_G; - uint32_t CM_BLNDGAM_RAMA_END_CNTL2_G; - uint32_t CM_BLNDGAM_RAMA_END_CNTL1_R; - uint32_t CM_BLNDGAM_RAMA_END_CNTL2_R; - uint32_t CM_BLNDGAM_RAMA_REGION_0_1; - uint32_t CM_BLNDGAM_RAMA_REGION_2_3; - uint32_t CM_BLNDGAM_RAMA_REGION_4_5; - uint32_t CM_BLNDGAM_RAMA_REGION_6_7; - uint32_t CM_BLNDGAM_RAMA_REGION_8_9; - uint32_t CM_BLNDGAM_RAMA_REGION_10_11; - uint32_t CM_BLNDGAM_RAMA_REGION_12_13; - uint32_t CM_BLNDGAM_RAMA_REGION_14_15; - uint32_t CM_BLNDGAM_RAMA_REGION_16_17; - uint32_t CM_BLNDGAM_RAMA_REGION_18_19; - uint32_t CM_BLNDGAM_RAMA_REGION_20_21; - uint32_t CM_BLNDGAM_RAMA_REGION_22_23; - uint32_t CM_BLNDGAM_RAMA_REGION_24_25; - uint32_t CM_BLNDGAM_RAMA_REGION_26_27; - uint32_t CM_BLNDGAM_RAMA_REGION_28_29; - uint32_t CM_BLNDGAM_RAMA_REGION_30_31; - uint32_t CM_BLNDGAM_RAMA_REGION_32_33; - uint32_t CM_BLNDGAM_LUT_INDEX; - uint32_t CM_BLNDGAM_LUT_DATA; - uint32_t CM_3DLUT_MODE; - uint32_t CM_3DLUT_INDEX; - uint32_t CM_3DLUT_DATA; - uint32_t CM_3DLUT_DATA_30BIT; - uint32_t CM_3DLUT_READ_WRITE_CONTROL; - uint32_t CM_SHAPER_LUT_WRITE_EN_MASK; - uint32_t CM_SHAPER_CONTROL; - uint32_t CM_SHAPER_RAMB_START_CNTL_B; - uint32_t CM_SHAPER_RAMB_START_CNTL_G; - uint32_t CM_SHAPER_RAMB_START_CNTL_R; - uint32_t CM_SHAPER_RAMB_END_CNTL_B; - uint32_t CM_SHAPER_RAMB_END_CNTL_G; - uint32_t CM_SHAPER_RAMB_END_CNTL_R; - uint32_t CM_SHAPER_RAMB_REGION_0_1; - uint32_t CM_SHAPER_RAMB_REGION_2_3; - uint32_t CM_SHAPER_RAMB_REGION_4_5; - uint32_t CM_SHAPER_RAMB_REGION_6_7; - uint32_t CM_SHAPER_RAMB_REGION_8_9; - uint32_t CM_SHAPER_RAMB_REGION_10_11; - uint32_t CM_SHAPER_RAMB_REGION_12_13; - uint32_t CM_SHAPER_RAMB_REGION_14_15; - uint32_t CM_SHAPER_RAMB_REGION_16_17; - uint32_t CM_SHAPER_RAMB_REGION_18_19; - uint32_t CM_SHAPER_RAMB_REGION_20_21; - uint32_t CM_SHAPER_RAMB_REGION_22_23; - uint32_t CM_SHAPER_RAMB_REGION_24_25; - uint32_t CM_SHAPER_RAMB_REGION_26_27; - uint32_t CM_SHAPER_RAMB_REGION_28_29; - uint32_t CM_SHAPER_RAMB_REGION_30_31; - uint32_t CM_SHAPER_RAMB_REGION_32_33; - uint32_t CM_SHAPER_RAMA_START_CNTL_B; - uint32_t CM_SHAPER_RAMA_START_CNTL_G; - uint32_t CM_SHAPER_RAMA_START_CNTL_R; - uint32_t CM_SHAPER_RAMA_END_CNTL_B; - uint32_t CM_SHAPER_RAMA_END_CNTL_G; - uint32_t CM_SHAPER_RAMA_END_CNTL_R; - uint32_t CM_SHAPER_RAMA_REGION_0_1; - uint32_t CM_SHAPER_RAMA_REGION_2_3; - uint32_t CM_SHAPER_RAMA_REGION_4_5; - uint32_t CM_SHAPER_RAMA_REGION_6_7; - uint32_t CM_SHAPER_RAMA_REGION_8_9; - uint32_t CM_SHAPER_RAMA_REGION_10_11; - uint32_t CM_SHAPER_RAMA_REGION_12_13; - uint32_t CM_SHAPER_RAMA_REGION_14_15; - uint32_t CM_SHAPER_RAMA_REGION_16_17; - uint32_t CM_SHAPER_RAMA_REGION_18_19; - uint32_t CM_SHAPER_RAMA_REGION_20_21; - uint32_t CM_SHAPER_RAMA_REGION_22_23; - uint32_t CM_SHAPER_RAMA_REGION_24_25; - uint32_t CM_SHAPER_RAMA_REGION_26_27; - uint32_t CM_SHAPER_RAMA_REGION_28_29; - uint32_t CM_SHAPER_RAMA_REGION_30_31; - uint32_t CM_SHAPER_RAMA_REGION_32_33; - uint32_t CM_SHAPER_LUT_INDEX; - uint32_t CM_SHAPER_LUT_DATA; - uint32_t CM_ICSC_CONTROL; - uint32_t CM_ICSC_C11_C12; - uint32_t CM_ICSC_C33_C34; - uint32_t CM_BNS_VALUES_R; - uint32_t CM_BNS_VALUES_G; - uint32_t CM_BNS_VALUES_B; - uint32_t CM_DGAM_RAMB_START_CNTL_B; - uint32_t CM_DGAM_RAMB_START_CNTL_G; - uint32_t CM_DGAM_RAMB_START_CNTL_R; - uint32_t CM_DGAM_RAMB_SLOPE_CNTL_B; - uint32_t CM_DGAM_RAMB_SLOPE_CNTL_G; - uint32_t CM_DGAM_RAMB_SLOPE_CNTL_R; - uint32_t CM_DGAM_RAMB_END_CNTL1_B; - uint32_t CM_DGAM_RAMB_END_CNTL2_B; - uint32_t CM_DGAM_RAMB_END_CNTL1_G; - uint32_t CM_DGAM_RAMB_END_CNTL2_G; - uint32_t CM_DGAM_RAMB_END_CNTL1_R; - uint32_t CM_DGAM_RAMB_END_CNTL2_R; - uint32_t CM_DGAM_RAMB_REGION_0_1; - uint32_t CM_DGAM_RAMB_REGION_14_15; - uint32_t CM_DGAM_RAMA_START_CNTL_B; - uint32_t CM_DGAM_RAMA_START_CNTL_G; - uint32_t CM_DGAM_RAMA_START_CNTL_R; - uint32_t CM_DGAM_RAMA_SLOPE_CNTL_B; - uint32_t CM_DGAM_RAMA_SLOPE_CNTL_G; - uint32_t CM_DGAM_RAMA_SLOPE_CNTL_R; - uint32_t CM_DGAM_RAMA_END_CNTL1_B; - uint32_t CM_DGAM_RAMA_END_CNTL2_B; - uint32_t CM_DGAM_RAMA_END_CNTL1_G; - uint32_t CM_DGAM_RAMA_END_CNTL2_G; - uint32_t CM_DGAM_RAMA_END_CNTL1_R; - uint32_t CM_DGAM_RAMA_END_CNTL2_R; - uint32_t CM_DGAM_RAMA_REGION_0_1; - uint32_t CM_DGAM_RAMA_REGION_14_15; - uint32_t CM_DGAM_LUT_WRITE_EN_MASK; - uint32_t CM_DGAM_LUT_INDEX; - uint32_t CM_DGAM_LUT_DATA; - uint32_t CM_CONTROL; - uint32_t CM_DGAM_CONTROL; - uint32_t CM_IGAM_CONTROL; - uint32_t CM_IGAM_LUT_RW_CONTROL; - uint32_t CM_IGAM_LUT_RW_INDEX; - uint32_t CM_IGAM_LUT_SEQ_COLOR; - uint32_t FORMAT_CONTROL; - uint32_t CNVC_SURFACE_PIXEL_FORMAT; - uint32_t CURSOR_CONTROL; - uint32_t CURSOR0_CONTROL; - uint32_t CURSOR0_COLOR0; - uint32_t CURSOR0_COLOR1; + DPP_COMMON_REG_VARIABLE_LIST }; struct dcn10_dpp { @@ -1284,6 +1283,10 @@ enum dcn10_input_csc_select { INPUT_CSC_SELECT_COMA }; +void dpp1_set_cursor_attributes( + struct dpp *dpp_base, + enum dc_cursor_color_format color_format); + bool dpp1_dscl_is_lb_conf_valid( int ceil_vratio, int num_partitions, @@ -1371,7 +1374,7 @@ void dpp1_cm_program_regamma_lutb_settings( const struct pwl_params *params); void dpp1_cm_set_output_csc_adjustment( struct dpp *dpp_base, - const struct out_csc_color_matrix *tbl_entry); + const uint16_t *regval); void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c index 4c90043e7b8c9bf919ee2c8f8667ebe95c4731b8..a5b099023652a4ecfc32fdae06d1045bf2fd650c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c @@ -49,6 +49,8 @@ #define FN(reg_name, field_name) \ dpp->tf_shift->field_name, dpp->tf_mask->field_name +#define NUM_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) + struct dcn10_input_csc_matrix { enum dc_color_space color_space; uint16_t regval[12]; @@ -223,18 +225,18 @@ void dpp1_cm_set_gamut_remap( static void dpp1_cm_program_color_matrix( struct dcn10_dpp *dpp, - const struct out_csc_color_matrix *tbl_entry) + const uint16_t *regval) { uint32_t mode; struct color_matrices_reg gam_regs; REG_GET(CM_OCSC_CONTROL, CM_OCSC_MODE, &mode); - if (tbl_entry == NULL) { + if (regval == NULL) { BREAK_TO_DEBUGGER(); return; } - + mode = 4; gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_OCSC_C11; gam_regs.masks.csc_c11 = dpp->tf_mask->CM_OCSC_C11; gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_OCSC_C12; @@ -247,7 +249,7 @@ static void dpp1_cm_program_color_matrix( cm_helper_program_color_matrices( dpp->base.ctx, - tbl_entry->regval, + regval, &gam_regs); } else { @@ -257,7 +259,7 @@ static void dpp1_cm_program_color_matrix( cm_helper_program_color_matrices( dpp->base.ctx, - tbl_entry->regval, + regval, &gam_regs); } } @@ -266,24 +268,18 @@ void dpp1_cm_set_output_csc_default( struct dpp *dpp_base, enum dc_color_space colorspace) { - struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - struct out_csc_color_matrix tbl_entry; - int i, j; - int arr_size = sizeof(output_csc_matrix) / sizeof(struct output_csc_matrix); + const uint16_t *regval = NULL; + int arr_size; uint32_t ocsc_mode = 4; - tbl_entry.color_space = colorspace; - - for (i = 0; i < arr_size; i++) - if (output_csc_matrix[i].color_space == colorspace) { - for (j = 0; j < 12; j++) - tbl_entry.regval[j] = output_csc_matrix[i].regval[j]; - break; - } - + regval = find_color_matrix(colorspace, &arr_size); + if (regval == NULL) { + BREAK_TO_DEBUGGER(); + return; + } + dpp1_cm_program_color_matrix(dpp, regval); REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); - dpp1_cm_program_color_matrix(dpp, &tbl_entry); } static void dpp1_cm_get_reg_field( @@ -315,41 +311,12 @@ static void dpp1_cm_get_reg_field( void dpp1_cm_set_output_csc_adjustment( struct dpp *dpp_base, - const struct out_csc_color_matrix *tbl_entry) + const uint16_t *regval) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - //enum csc_color_mode config = CSC_COLOR_MODE_GRAPHICS_OUTPUT_CSC; uint32_t ocsc_mode = 4; - - /** - *if (tbl_entry != NULL) { - * switch (tbl_entry->color_space) { - * case COLOR_SPACE_SRGB: - * case COLOR_SPACE_2020_RGB_FULLRANGE: - * ocsc_mode = 0; - * break; - * case COLOR_SPACE_SRGB_LIMITED: - * case COLOR_SPACE_2020_RGB_LIMITEDRANGE: - * ocsc_mode = 1; - * break; - * case COLOR_SPACE_YCBCR601: - * case COLOR_SPACE_YCBCR601_LIMITED: - * ocsc_mode = 2; - * break; - * case COLOR_SPACE_YCBCR709: - * case COLOR_SPACE_YCBCR709_LIMITED: - * case COLOR_SPACE_2020_YCBCR: - * ocsc_mode = 3; - * break; - * case COLOR_SPACE_UNKNOWN: - * default: - * break; - * } - *} - */ - + dpp1_cm_program_color_matrix(dpp, regval); REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode); - dpp1_cm_program_color_matrix(dpp, tbl_entry); } void dpp1_cm_power_on_regamma_lut(struct dpp *dpp_base, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 584e82cc5df364a12ce9cecb7330e8b0387b9fa9..585b33384002364d031372adeab0918238821ec8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -48,9 +48,20 @@ void hubp1_set_blank(struct hubp *hubp, bool blank) HUBP_TTU_DISABLE, blank_en); if (blank) { - REG_WAIT(DCHUBP_CNTL, - HUBP_NO_OUTSTANDING_REQ, 1, - 1, 200); + uint32_t reg_val = REG_READ(DCHUBP_CNTL); + + if (reg_val) { + /* init sequence workaround: in case HUBP is + * power gated, this wait would timeout. + * + * we just wrote reg_val to non-0, if it stay 0 + * it means HUBP is gated + */ + REG_WAIT(DCHUBP_CNTL, + HUBP_NO_OUTSTANDING_REQ, 1, + 1, 200); + } + hubp->mpcc_id = 0xf; hubp->opp_id = 0xf; } @@ -96,10 +107,12 @@ static void hubp1_vready_workaround(struct hubp *hubp, } void hubp1_program_tiling( - struct dcn10_hubp *hubp1, + struct hubp *hubp, const union dc_tiling_info *info, const enum surface_pixel_format pixel_format) { + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + REG_UPDATE_6(DCSURF_ADDR_CONFIG, NUM_PIPES, log_2(info->gfx9.num_pipes), NUM_BANKS, log_2(info->gfx9.num_banks), @@ -116,13 +129,14 @@ void hubp1_program_tiling( } void hubp1_program_size_and_rotation( - struct dcn10_hubp *hubp1, + struct hubp *hubp, enum dc_rotation_angle rotation, enum surface_pixel_format format, const union plane_size *plane_size, struct dc_plane_dcc_param *dcc, bool horizontal_mirror) { + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); uint32_t pitch, meta_pitch, pitch_c, meta_pitch_c, mirror; /* Program data and meta surface pitch (calculation from addrlib) @@ -178,9 +192,10 @@ void hubp1_program_size_and_rotation( } void hubp1_program_pixel_format( - struct dcn10_hubp *hubp1, + struct hubp *hubp, enum surface_pixel_format format) { + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); uint32_t red_bar = 3; uint32_t blue_bar = 2; @@ -424,13 +439,11 @@ void hubp1_program_surface_config( struct dc_plane_dcc_param *dcc, bool horizontal_mirror) { - struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - hubp1_dcc_control(hubp, dcc->enable, dcc->grph.independent_64b_blks); - hubp1_program_tiling(hubp1, tiling_info, format); + hubp1_program_tiling(hubp, tiling_info, format); hubp1_program_size_and_rotation( - hubp1, rotation, format, plane_size, dcc, horizontal_mirror); - hubp1_program_pixel_format(hubp1, format); + hubp, rotation, format, plane_size, dcc, horizontal_mirror); + hubp1_program_pixel_format(hubp, format); } void hubp1_program_requestor( @@ -765,42 +778,7 @@ void hubp1_read_state(struct dcn10_hubp *hubp1, QoS_LEVEL_HIGH_WM, &s->qos_level_high_wm); } -enum cursor_pitch { - CURSOR_PITCH_64_PIXELS = 0, - CURSOR_PITCH_128_PIXELS, - CURSOR_PITCH_256_PIXELS -}; - -enum cursor_lines_per_chunk { - CURSOR_LINE_PER_CHUNK_2 = 1, - CURSOR_LINE_PER_CHUNK_4, - CURSOR_LINE_PER_CHUNK_8, - CURSOR_LINE_PER_CHUNK_16 -}; - -static bool ippn10_cursor_program_control( - struct dcn10_hubp *hubp1, - bool pixel_data_invert, - enum dc_cursor_color_format color_format) -{ - if (REG(CURSOR_SETTINS)) - REG_SET_2(CURSOR_SETTINS, 0, - /* no shift of the cursor HDL schedule */ - CURSOR0_DST_Y_OFFSET, 0, - /* used to shift the cursor chunk request deadline */ - CURSOR0_CHUNK_HDL_ADJUST, 3); - else - REG_SET_2(CURSOR_SETTINGS, 0, - /* no shift of the cursor HDL schedule */ - CURSOR0_DST_Y_OFFSET, 0, - /* used to shift the cursor chunk request deadline */ - CURSOR0_CHUNK_HDL_ADJUST, 3); - - return true; -} - -static enum cursor_pitch ippn10_get_cursor_pitch( - unsigned int pitch) +enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch) { enum cursor_pitch hw_pitch; @@ -823,7 +801,7 @@ static enum cursor_pitch ippn10_get_cursor_pitch( return hw_pitch; } -static enum cursor_lines_per_chunk ippn10_get_lines_per_chunk( +static enum cursor_lines_per_chunk hubp1_get_lines_per_chunk( unsigned int cur_width, enum dc_cursor_color_format format) { @@ -849,8 +827,8 @@ void hubp1_cursor_set_attributes( const struct dc_cursor_attributes *attr) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - enum cursor_pitch hw_pitch = ippn10_get_cursor_pitch(attr->pitch); - enum cursor_lines_per_chunk lpc = ippn10_get_lines_per_chunk( + enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch); + enum cursor_lines_per_chunk lpc = hubp1_get_lines_per_chunk( attr->width, attr->color_format); hubp->curs_attr = *attr; @@ -863,13 +841,17 @@ void hubp1_cursor_set_attributes( REG_UPDATE_2(CURSOR_SIZE, CURSOR_WIDTH, attr->width, CURSOR_HEIGHT, attr->height); + REG_UPDATE_3(CURSOR_CONTROL, CURSOR_MODE, attr->color_format, CURSOR_PITCH, hw_pitch, CURSOR_LINES_PER_CHUNK, lpc); - ippn10_cursor_program_control(hubp1, - attr->attribute_flags.bits.INVERT_PIXEL_DATA, - attr->color_format); + + REG_SET_2(CURSOR_SETTINS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, + /* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); } void hubp1_cursor_set_position( @@ -909,7 +891,8 @@ void hubp1_cursor_set_position( cur_en = 0; /* not visible beyond left edge*/ if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0) - hubp1_cursor_set_attributes(hubp, &hubp->curs_attr); + hubp->funcs->set_cursor_attributes(hubp, &hubp->curs_attr); + REG_UPDATE(CURSOR_CONTROL, CURSOR_ENABLE, cur_en); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index a7834dd507163eec24cf165862b7d006f0ce3441..33e91d9c010f8bb48f18fe67cfbdfc4b35575c81 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -127,113 +127,110 @@ SRI(CURSOR_HOT_SPOT, CURSOR, id), \ SRI(CURSOR_DST_OFFSET, CURSOR, id) - - -struct dcn_mi_registers { - uint32_t DCHUBP_CNTL; - uint32_t HUBPREQ_DEBUG_DB; - uint32_t DCSURF_ADDR_CONFIG; - uint32_t DCSURF_TILING_CONFIG; - uint32_t DCSURF_SURFACE_PITCH; - uint32_t DCSURF_SURFACE_PITCH_C; - uint32_t DCSURF_SURFACE_CONFIG; - uint32_t DCSURF_FLIP_CONTROL; - uint32_t DCSURF_PRI_VIEWPORT_DIMENSION; - uint32_t DCSURF_PRI_VIEWPORT_START; - uint32_t DCSURF_SEC_VIEWPORT_DIMENSION; - uint32_t DCSURF_SEC_VIEWPORT_START; - uint32_t DCSURF_PRI_VIEWPORT_DIMENSION_C; - uint32_t DCSURF_PRI_VIEWPORT_START_C; - uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; - uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; - uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; - uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; - uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; - uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; - uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; - uint32_t DCSURF_SURFACE_INUSE; - uint32_t DCSURF_SURFACE_INUSE_HIGH; - uint32_t DCSURF_SURFACE_INUSE_C; - uint32_t DCSURF_SURFACE_INUSE_HIGH_C; - uint32_t DCSURF_SURFACE_EARLIEST_INUSE; - uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH; - uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C; - uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C; - uint32_t DCSURF_SURFACE_CONTROL; - uint32_t HUBPRET_CONTROL; - uint32_t DCN_EXPANSION_MODE; - uint32_t DCHUBP_REQ_SIZE_CONFIG; - uint32_t DCHUBP_REQ_SIZE_CONFIG_C; - uint32_t BLANK_OFFSET_0; - uint32_t BLANK_OFFSET_1; - uint32_t DST_DIMENSIONS; - uint32_t DST_AFTER_SCALER; - uint32_t PREFETCH_SETTINS; - uint32_t PREFETCH_SETTINGS; - uint32_t VBLANK_PARAMETERS_0; - uint32_t REF_FREQ_TO_PIX_FREQ; - uint32_t VBLANK_PARAMETERS_1; - uint32_t VBLANK_PARAMETERS_3; - uint32_t NOM_PARAMETERS_0; - uint32_t NOM_PARAMETERS_1; - uint32_t NOM_PARAMETERS_4; - uint32_t NOM_PARAMETERS_5; - uint32_t PER_LINE_DELIVERY_PRE; - uint32_t PER_LINE_DELIVERY; - uint32_t PREFETCH_SETTINS_C; - uint32_t PREFETCH_SETTINGS_C; - uint32_t VBLANK_PARAMETERS_2; - uint32_t VBLANK_PARAMETERS_4; - uint32_t NOM_PARAMETERS_2; - uint32_t NOM_PARAMETERS_3; - uint32_t NOM_PARAMETERS_6; - uint32_t NOM_PARAMETERS_7; - uint32_t DCN_TTU_QOS_WM; - uint32_t DCN_GLOBAL_TTU_CNTL; - uint32_t DCN_SURF0_TTU_CNTL0; - uint32_t DCN_SURF0_TTU_CNTL1; - uint32_t DCN_SURF1_TTU_CNTL0; - uint32_t DCN_SURF1_TTU_CNTL1; - uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; - uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; - uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; - uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LSB; - uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_MSB; - uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LSB; - uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_MSB; - uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_LSB; - uint32_t DCN_VM_MX_L1_TLB_CNTL; - uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB; - uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB; - uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_MSB; - uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_LSB; - uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_MSB; - uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_LSB; - uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR; - uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR; - uint32_t DCHUBBUB_SDPIF_FB_BASE; - uint32_t DCHUBBUB_SDPIF_FB_OFFSET; - uint32_t DCN_VM_FB_LOCATION_TOP; - uint32_t DCN_VM_FB_LOCATION_BASE; - uint32_t DCN_VM_FB_OFFSET; - uint32_t DCN_VM_AGP_BASE; - uint32_t DCN_VM_AGP_BOT; - uint32_t DCN_VM_AGP_TOP; - uint32_t CURSOR_SETTINS; - uint32_t CURSOR_SETTINGS; - uint32_t CURSOR_SURFACE_ADDRESS_HIGH; - uint32_t CURSOR_SURFACE_ADDRESS; - uint32_t CURSOR_SIZE; - uint32_t CURSOR_CONTROL; - uint32_t CURSOR_POSITION; - uint32_t CURSOR_HOT_SPOT; - uint32_t CURSOR_DST_OFFSET; -}; +#define HUBP_COMMON_REG_VARIABLE_LIST \ + uint32_t DCHUBP_CNTL; \ + uint32_t HUBPREQ_DEBUG_DB; \ + uint32_t DCSURF_ADDR_CONFIG; \ + uint32_t DCSURF_TILING_CONFIG; \ + uint32_t DCSURF_SURFACE_PITCH; \ + uint32_t DCSURF_SURFACE_PITCH_C; \ + uint32_t DCSURF_SURFACE_CONFIG; \ + uint32_t DCSURF_FLIP_CONTROL; \ + uint32_t DCSURF_PRI_VIEWPORT_DIMENSION; \ + uint32_t DCSURF_PRI_VIEWPORT_START; \ + uint32_t DCSURF_SEC_VIEWPORT_DIMENSION; \ + uint32_t DCSURF_SEC_VIEWPORT_START; \ + uint32_t DCSURF_PRI_VIEWPORT_DIMENSION_C; \ + uint32_t DCSURF_PRI_VIEWPORT_START_C; \ + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; \ + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS; \ + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS_HIGH; \ + uint32_t DCSURF_SECONDARY_SURFACE_ADDRESS; \ + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH; \ + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS; \ + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS_HIGH; \ + uint32_t DCSURF_SECONDARY_META_SURFACE_ADDRESS; \ + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; \ + uint32_t DCSURF_PRIMARY_SURFACE_ADDRESS_C; \ + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_HIGH_C; \ + uint32_t DCSURF_PRIMARY_META_SURFACE_ADDRESS_C; \ + uint32_t DCSURF_SURFACE_INUSE; \ + uint32_t DCSURF_SURFACE_INUSE_HIGH; \ + uint32_t DCSURF_SURFACE_INUSE_C; \ + uint32_t DCSURF_SURFACE_INUSE_HIGH_C; \ + uint32_t DCSURF_SURFACE_EARLIEST_INUSE; \ + uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH; \ + uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C; \ + uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C; \ + uint32_t DCSURF_SURFACE_CONTROL; \ + uint32_t HUBPRET_CONTROL; \ + uint32_t DCN_EXPANSION_MODE; \ + uint32_t DCHUBP_REQ_SIZE_CONFIG; \ + uint32_t DCHUBP_REQ_SIZE_CONFIG_C; \ + uint32_t BLANK_OFFSET_0; \ + uint32_t BLANK_OFFSET_1; \ + uint32_t DST_DIMENSIONS; \ + uint32_t DST_AFTER_SCALER; \ + uint32_t PREFETCH_SETTINS; \ + uint32_t PREFETCH_SETTINGS; \ + uint32_t VBLANK_PARAMETERS_0; \ + uint32_t REF_FREQ_TO_PIX_FREQ; \ + uint32_t VBLANK_PARAMETERS_1; \ + uint32_t VBLANK_PARAMETERS_3; \ + uint32_t NOM_PARAMETERS_0; \ + uint32_t NOM_PARAMETERS_1; \ + uint32_t NOM_PARAMETERS_4; \ + uint32_t NOM_PARAMETERS_5; \ + uint32_t PER_LINE_DELIVERY_PRE; \ + uint32_t PER_LINE_DELIVERY; \ + uint32_t PREFETCH_SETTINS_C; \ + uint32_t PREFETCH_SETTINGS_C; \ + uint32_t VBLANK_PARAMETERS_2; \ + uint32_t VBLANK_PARAMETERS_4; \ + uint32_t NOM_PARAMETERS_2; \ + uint32_t NOM_PARAMETERS_3; \ + uint32_t NOM_PARAMETERS_6; \ + uint32_t NOM_PARAMETERS_7; \ + uint32_t DCN_TTU_QOS_WM; \ + uint32_t DCN_GLOBAL_TTU_CNTL; \ + uint32_t DCN_SURF0_TTU_CNTL0; \ + uint32_t DCN_SURF0_TTU_CNTL1; \ + uint32_t DCN_SURF1_TTU_CNTL0; \ + uint32_t DCN_SURF1_TTU_CNTL1; \ + uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; \ + uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; \ + uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; \ + uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LSB; \ + uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_MSB; \ + uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LSB; \ + uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_MSB; \ + uint32_t DCN_VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR_LSB; \ + uint32_t DCN_VM_MX_L1_TLB_CNTL; \ + uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB; \ + uint32_t DCN_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB; \ + uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_MSB; \ + uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR_LSB; \ + uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_MSB; \ + uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR_LSB; \ + uint32_t DCN_VM_SYSTEM_APERTURE_LOW_ADDR; \ + uint32_t DCN_VM_SYSTEM_APERTURE_HIGH_ADDR; \ + uint32_t DCHUBBUB_SDPIF_FB_BASE; \ + uint32_t DCHUBBUB_SDPIF_FB_OFFSET; \ + uint32_t DCN_VM_FB_LOCATION_TOP; \ + uint32_t DCN_VM_FB_LOCATION_BASE; \ + uint32_t DCN_VM_FB_OFFSET; \ + uint32_t DCN_VM_AGP_BASE; \ + uint32_t DCN_VM_AGP_BOT; \ + uint32_t DCN_VM_AGP_TOP; \ + uint32_t CURSOR_SETTINS; \ + uint32_t CURSOR_SETTINGS; \ + uint32_t CURSOR_SURFACE_ADDRESS_HIGH; \ + uint32_t CURSOR_SURFACE_ADDRESS; \ + uint32_t CURSOR_SIZE; \ + uint32_t CURSOR_CONTROL; \ + uint32_t CURSOR_POSITION; \ + uint32_t CURSOR_HOT_SPOT; \ + uint32_t CURSOR_DST_OFFSET #define HUBP_SF(reg_name, field_name, post_fix)\ .field_name = reg_name ## __ ## field_name ## post_fix @@ -397,7 +394,6 @@ struct dcn_mi_registers { HUBP_SF(CURSOR0_CURSOR_HOT_SPOT, CURSOR_HOT_SPOT_Y, mask_sh), \ HUBP_SF(CURSOR0_CURSOR_DST_OFFSET, CURSOR_DST_X_OFFSET, mask_sh) - #define DCN_HUBP_REG_FIELD_LIST(type) \ type HUBP_BLANK_EN;\ type HUBP_TTU_DISABLE;\ @@ -577,6 +573,10 @@ struct dcn_mi_registers { type CURSOR_DST_X_OFFSET; \ type OUTPUT_FP +struct dcn_mi_registers { + HUBP_COMMON_REG_VARIABLE_LIST; +}; + struct dcn_mi_shift { DCN_HUBP_REG_FIELD_LIST(uint8_t); }; @@ -611,11 +611,11 @@ void hubp1_program_requestor( struct _vcs_dpi_display_rq_regs_st *rq_regs); void hubp1_program_pixel_format( - struct dcn10_hubp *hubp, + struct hubp *hubp, enum surface_pixel_format format); void hubp1_program_size_and_rotation( - struct dcn10_hubp *hubp, + struct hubp *hubp, enum dc_rotation_angle rotation, enum surface_pixel_format format, const union plane_size *plane_size, @@ -623,7 +623,7 @@ void hubp1_program_size_and_rotation( bool horizontal_mirror); void hubp1_program_tiling( - struct dcn10_hubp *hubp, + struct hubp *hubp, const union dc_tiling_info *info, const enum surface_pixel_format pixel_format); @@ -681,4 +681,6 @@ struct dcn_hubp_state { void hubp1_read_state(struct dcn10_hubp *hubp1, struct dcn_hubp_state *s); +enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 8e2ddbc2129cd3ead334f672d7cf6afb15e7711b..82572863acab747759e3d8c2787a1e2cffc1f8df 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -32,7 +32,7 @@ #include "dce/dce_hwseq.h" #include "abm.h" #include "dmcu.h" -#include "dcn10/dcn10_timing_generator.h" +#include "dcn10_optc.h" #include "dcn10/dcn10_dpp.h" #include "dcn10/dcn10_mpc.h" #include "timing_generator.h" @@ -43,6 +43,7 @@ #include "custom_float.h" #include "dcn10_hubp.h" #include "dcn10_hubbub.h" +#include "dcn10_cm_common.h" #define CTX \ hws->ctx @@ -158,7 +159,7 @@ void dcn10_log_hw_state(struct dc *dc) struct timing_generator *tg = pool->timing_generators[i]; struct dcn_otg_state s = {0}; - tgn10_read_otg_state(DCN10TG_FROM_TG(tg), &s); + optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); //only print if OTG master is enabled if ((s.otg_enabled & 1) == 0) @@ -425,6 +426,34 @@ static void bios_golden_init(struct dc *dc) } } +static void false_optc_underflow_wa( + struct dc *dc, + const struct dc_stream_state *stream, + struct timing_generator *tg) +{ + int i; + bool underflow; + + if (!dc->hwseq->wa.false_optc_underflow) + return; + + underflow = tg->funcs->is_optc_underflow_occurred(tg); + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (old_pipe_ctx->stream != stream) + continue; + + dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, old_pipe_ctx); + } + + tg->funcs->set_blank_data_double_buffer(tg, true); + + if (tg->funcs->is_optc_underflow_occurred(tg) && !underflow) + tg->funcs->clear_optc_underflow(tg); +} + static enum dc_status dcn10_prog_pixclk_crtc_otg( struct pipe_ctx *pipe_ctx, struct dc_state *context, @@ -433,9 +462,6 @@ static enum dc_status dcn10_prog_pixclk_crtc_otg( struct dc_stream_state *stream = pipe_ctx->stream; enum dc_color_space color_space; struct tg_color black_color = {0}; - bool enableStereo = stream->timing.timing_3d_format == TIMING_3D_FORMAT_NONE ? - false:true; - bool rightEyePolarity = stream->timing.flags.RIGHT_EYE_3D_POLARITY; /* by upper caller loop, pipe0 is parent pipe and be called first. * back end is set up by for pipe0. Other children pipe share back end @@ -470,11 +496,6 @@ static enum dc_status dcn10_prog_pixclk_crtc_otg( &stream->timing, true); - pipe_ctx->stream_res.opp->funcs->opp_set_stereo_polarity( - pipe_ctx->stream_res.opp, - enableStereo, - rightEyePolarity); - #if 0 /* move to after enable_crtc */ /* TODO: OPP FMT, ABM. etc. should be done here. */ /* or FPGA now. instance 0 only. TODO: move to opp.c */ @@ -489,12 +510,18 @@ static enum dc_status dcn10_prog_pixclk_crtc_otg( /* program otg blank color */ color_space = stream->output_color_space; color_space_to_black_color(dc, color_space, &black_color); - pipe_ctx->stream_res.tg->funcs->set_blank_color( - pipe_ctx->stream_res.tg, - &black_color); - pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, true); - hwss_wait_for_blank_complete(pipe_ctx->stream_res.tg); + if (pipe_ctx->stream_res.tg->funcs->set_blank_color) + pipe_ctx->stream_res.tg->funcs->set_blank_color( + pipe_ctx->stream_res.tg, + &black_color); + + if (pipe_ctx->stream_res.tg->funcs->is_blanked && + !pipe_ctx->stream_res.tg->funcs->is_blanked(pipe_ctx->stream_res.tg)) { + pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, true); + hwss_wait_for_blank_complete(pipe_ctx->stream_res.tg); + false_optc_underflow_wa(dc, pipe_ctx->stream, pipe_ctx->stream_res.tg); + } /* VTG is within DCHUB command block. DCFCLK is always on */ if (false == pipe_ctx->stream_res.tg->funcs->enable_crtc(pipe_ctx->stream_res.tg)) { @@ -573,41 +600,34 @@ static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx) int fe_idx = pipe_ctx->pipe_idx; struct hubp *hubp = dc->res_pool->hubps[fe_idx]; struct mpc *mpc = dc->res_pool->mpc; - int opp_id, z_idx; - int mpcc_id = -1; + int opp_id; + struct mpc_tree *mpc_tree_params; + struct mpcc *mpcc_to_remove = NULL; /* look at tree rather than mi here to know if we already reset */ for (opp_id = 0; opp_id < dc->res_pool->pipe_count; opp_id++) { struct output_pixel_processor *opp = dc->res_pool->opps[opp_id]; - for (z_idx = 0; z_idx < opp->mpc_tree.num_pipes; z_idx++) { - if (opp->mpc_tree.dpp[z_idx] == fe_idx) { - mpcc_id = opp->mpc_tree.mpcc[z_idx]; - break; - } - } - if (mpcc_id != -1) + mpc_tree_params = &(opp->mpc_tree_params); + mpcc_to_remove = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, fe_idx); + if (mpcc_to_remove != NULL) break; } + /*Already reset*/ if (opp_id == dc->res_pool->pipe_count) return; - mpc->funcs->remove(mpc, &(dc->res_pool->opps[opp_id]->mpc_tree), - dc->res_pool->opps[opp_id]->inst, fe_idx); + mpc->funcs->remove_mpcc(mpc, mpc_tree_params, mpcc_to_remove); + dc->res_pool->opps[opp_id]->mpcc_disconnect_pending[fe_idx] = true; + + dc->optimized_required = true; if (hubp->funcs->hubp_disconnect) hubp->funcs->hubp_disconnect(hubp); if (dc->debug.sanity_checks) dcn10_verify_allow_pstate_change_high(dc); - - pipe_ctx->stream = NULL; - memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res)); - memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res)); - pipe_ctx->top_pipe = NULL; - pipe_ctx->bottom_pipe = NULL; - pipe_ctx->plane_state = NULL; } static void plane_atomic_power_down(struct dc *dc, int fe_idx) @@ -636,29 +656,30 @@ static void plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) int fe_idx = pipe_ctx->pipe_idx; struct dce_hwseq *hws = dc->hwseq; struct hubp *hubp = dc->res_pool->hubps[fe_idx]; - struct mpc *mpc = dc->res_pool->mpc; int opp_id = hubp->opp_id; - struct output_pixel_processor *opp; - if (opp_id != 0xf) { - mpc->funcs->wait_for_idle(mpc, hubp->mpcc_id); - opp = dc->res_pool->opps[hubp->opp_id]; - opp->mpcc_disconnect_pending[hubp->mpcc_id] = false; - hubp->funcs->set_blank(hubp, true); - } + dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe_ctx); REG_UPDATE(HUBP_CLK_CNTL[fe_idx], HUBP_CLOCK_ENABLE, 0); REG_UPDATE(DPP_CONTROL[fe_idx], DPP_CLOCK_ENABLE, 0); - if (opp_id != 0xf && dc->res_pool->opps[opp_id]->mpc_tree.num_pipes == 0) + if (opp_id != 0xf && dc->res_pool->opps[opp_id]->mpc_tree_params.opp_list == NULL) REG_UPDATE(OPP_PIPE_CONTROL[opp_id], OPP_PIPE_CLOCK_EN, 0); hubp->power_gated = true; + dc->optimized_required = false; /* We're powering off, no need to optimize */ plane_atomic_power_down(dc, fe_idx); + + pipe_ctx->stream = NULL; + memset(&pipe_ctx->stream_res, 0, sizeof(pipe_ctx->stream_res)); + memset(&pipe_ctx->plane_res, 0, sizeof(pipe_ctx->plane_res)); + pipe_ctx->top_pipe = NULL; + pipe_ctx->bottom_pipe = NULL; + pipe_ctx->plane_state = NULL; } static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) @@ -740,25 +761,27 @@ static void dcn10_init_hw(struct dc *dc) } } + /* Reset all MPCC muxes */ + dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct timing_generator *tg = dc->res_pool->timing_generators[i]; struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - struct output_pixel_processor *opp = dc->res_pool->opps[i]; - struct mpc_tree_cfg *mpc_tree = &opp->mpc_tree; struct hubp *hubp = dc->res_pool->hubps[i]; - mpc_tree->dpp[0] = i; - mpc_tree->mpcc[0] = i; - mpc_tree->num_pipes = 1; - pipe_ctx->stream_res.tg = tg; pipe_ctx->pipe_idx = i; pipe_ctx->plane_res.hubp = hubp; hubp->mpcc_id = i; - hubp->opp_id = dc->res_pool->mpc->funcs->get_opp_id(dc->res_pool->mpc, i); + hubp->opp_id = 0xf; hubp->power_gated = false; + dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst; + dc->res_pool->opps[i]->mpc_tree_params.opp_list = NULL; + dc->res_pool->opps[i]->mpcc_disconnect_pending[i] = true; + pipe_ctx->stream_res.opp = dc->res_pool->opps[i]; + plane_atomic_disconnect(dc, pipe_ctx); } @@ -929,280 +952,10 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx, return result; } -/*modify the method to handle rgb for arr_points*/ -static bool convert_to_custom_float( - struct pwl_result_data *rgb_resulted, - struct curve_points *arr_points, - uint32_t hw_points_num) -{ - struct custom_float_format fmt; - - struct pwl_result_data *rgb = rgb_resulted; - - uint32_t i = 0; - - fmt.exponenta_bits = 6; - fmt.mantissa_bits = 12; - fmt.sign = false; - - if (!convert_to_custom_float_format(arr_points[0].x, &fmt, - &arr_points[0].custom_float_x)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(arr_points[0].offset, &fmt, - &arr_points[0].custom_float_offset)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(arr_points[0].slope, &fmt, - &arr_points[0].custom_float_slope)) { - BREAK_TO_DEBUGGER(); - return false; - } - - fmt.mantissa_bits = 10; - fmt.sign = false; - - if (!convert_to_custom_float_format(arr_points[1].x, &fmt, - &arr_points[1].custom_float_x)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(arr_points[1].y, &fmt, - &arr_points[1].custom_float_y)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(arr_points[1].slope, &fmt, - &arr_points[1].custom_float_slope)) { - BREAK_TO_DEBUGGER(); - return false; - } - fmt.mantissa_bits = 12; - fmt.sign = true; - while (i != hw_points_num) { - if (!convert_to_custom_float_format(rgb->red, &fmt, - &rgb->red_reg)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(rgb->green, &fmt, - &rgb->green_reg)) { - BREAK_TO_DEBUGGER(); - return false; - } - if (!convert_to_custom_float_format(rgb->blue, &fmt, - &rgb->blue_reg)) { - BREAK_TO_DEBUGGER(); - return false; - } - if (!convert_to_custom_float_format(rgb->delta_red, &fmt, - &rgb->delta_red_reg)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(rgb->delta_green, &fmt, - &rgb->delta_green_reg)) { - BREAK_TO_DEBUGGER(); - return false; - } - - if (!convert_to_custom_float_format(rgb->delta_blue, &fmt, - &rgb->delta_blue_reg)) { - BREAK_TO_DEBUGGER(); - return false; - } - - ++rgb; - ++i; - } - - return true; -} -#define MAX_REGIONS_NUMBER 34 -#define MAX_LOW_POINT 25 -#define NUMBER_SEGMENTS 32 - -static bool -dcn10_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, - struct pwl_params *regamma_params) -{ - struct curve_points *arr_points; - struct pwl_result_data *rgb_resulted; - struct pwl_result_data *rgb; - struct pwl_result_data *rgb_plus_1; - struct fixed31_32 y_r; - struct fixed31_32 y_g; - struct fixed31_32 y_b; - struct fixed31_32 y1_min; - struct fixed31_32 y3_max; - - int32_t segment_start, segment_end; - int32_t i; - uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; - - if (output_tf == NULL || regamma_params == NULL || output_tf->type == TF_TYPE_BYPASS) - return false; - - PERF_TRACE(); - - arr_points = regamma_params->arr_points; - rgb_resulted = regamma_params->rgb_resulted; - hw_points = 0; - - memset(regamma_params, 0, sizeof(struct pwl_params)); - memset(seg_distr, 0, sizeof(seg_distr)); - - if (output_tf->tf == TRANSFER_FUNCTION_PQ) { - /* 32 segments - * segments are from 2^-25 to 2^7 - */ - for (i = 0; i < 32 ; i++) - seg_distr[i] = 3; - - segment_start = -25; - segment_end = 7; - } else { - /* 10 segments - * segment is from 2^-10 to 2^0 - * There are less than 256 points, for optimization - */ - seg_distr[0] = 3; - seg_distr[1] = 4; - seg_distr[2] = 4; - seg_distr[3] = 4; - seg_distr[4] = 4; - seg_distr[5] = 4; - seg_distr[6] = 4; - seg_distr[7] = 4; - seg_distr[8] = 5; - seg_distr[9] = 5; - - segment_start = -10; - segment_end = 0; - } - - for (i = segment_end - segment_start; i < MAX_REGIONS_NUMBER ; i++) - seg_distr[i] = -1; - - for (k = 0; k < MAX_REGIONS_NUMBER; k++) { - if (seg_distr[k] != -1) - hw_points += (1 << seg_distr[k]); - } - - j = 0; - for (k = 0; k < (segment_end - segment_start); k++) { - increment = NUMBER_SEGMENTS / (1 << seg_distr[k]); - start_index = (segment_start + k + MAX_LOW_POINT) * NUMBER_SEGMENTS; - for (i = start_index; i < start_index + NUMBER_SEGMENTS; i += increment) { - if (j == hw_points - 1) - break; - rgb_resulted[j].red = output_tf->tf_pts.red[i]; - rgb_resulted[j].green = output_tf->tf_pts.green[i]; - rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; - j++; - } - } - - /* last point */ - start_index = (segment_end + MAX_LOW_POINT) * NUMBER_SEGMENTS; - rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; - rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; - rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - - arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(segment_start)); - arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2), - dal_fixed31_32_from_int(segment_end)); - - y_r = rgb_resulted[0].red; - y_g = rgb_resulted[0].green; - y_b = rgb_resulted[0].blue; - - y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b)); - - arr_points[0].y = y1_min; - arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x); - y_r = rgb_resulted[hw_points - 1].red; - y_g = rgb_resulted[hw_points - 1].green; - y_b = rgb_resulted[hw_points - 1].blue; - - /* see comment above, m_arrPoints[1].y should be the Y value for the - * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) - */ - y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b)); - - arr_points[1].y = y3_max; - - arr_points[1].slope = dal_fixed31_32_zero; - - if (output_tf->tf == TRANSFER_FUNCTION_PQ) { - /* for PQ, we want to have a straight line from last HW X point, - * and the slope to be such that we hit 1.0 at 10000 nits. - */ - const struct fixed31_32 end_value = - dal_fixed31_32_from_int(125); - - arr_points[1].slope = dal_fixed31_32_div( - dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y), - dal_fixed31_32_sub(end_value, arr_points[1].x)); - } - - regamma_params->hw_points_num = hw_points; - - i = 1; - for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) { - if (seg_distr[k] != -1) { - regamma_params->arr_curve_points[k].segments_num = - seg_distr[k]; - regamma_params->arr_curve_points[i].offset = - regamma_params->arr_curve_points[k].offset + (1 << seg_distr[k]); - } - i++; - } - - if (seg_distr[k] != -1) - regamma_params->arr_curve_points[k].segments_num = seg_distr[k]; - - rgb = rgb_resulted; - rgb_plus_1 = rgb_resulted + 1; - - i = 1; - - while (i != hw_points + 1) { - if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; - - rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red); - rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green); - rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue); - - ++rgb_plus_1; - ++rgb; - ++i; - } - - convert_to_custom_float(rgb_resulted, arr_points, hw_points); - - PERF_TRACE(); - - return true; -} static bool dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx, @@ -1223,9 +976,9 @@ dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx, /* dcn10_translate_regamma_to_hw_format takes 750us, only do it when full * update. */ - else if (dcn10_translate_regamma_to_hw_format( + else if (cm_helper_translate_curve_to_hw_format( stream->out_transfer_func, - &dpp->regamma_params)) { + &dpp->regamma_params, false)) { dpp->funcs->dpp_program_regamma_pwl( dpp, &dpp->regamma_params, OPP_REGAMMA_USER); @@ -1579,7 +1332,6 @@ static void dcn10_enable_plane( /* make sure OPP_PIPE_CLOCK_EN = 1 */ REG_UPDATE(OPP_PIPE_CONTROL[pipe_ctx->stream_res.tg->inst], OPP_PIPE_CLOCK_EN, 1); - /*TODO: REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, 0x1f);*/ /* TODO: enable/disable in dm as per update type. if (plane_state) { @@ -1672,60 +1424,15 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace, uint16_t *matrix) { - int i; - struct out_csc_color_matrix tbl_entry; - if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) { - enum dc_color_space color_space = - pipe_ctx->stream->output_color_space; - - //uint16_t matrix[12]; - for (i = 0; i < 12; i++) - tbl_entry.regval[i] = pipe_ctx->stream->csc_color_matrix.matrix[i]; - - tbl_entry.color_space = color_space; - //tbl_entry.regval = matrix; - if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment != NULL) - pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry); + pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, matrix); } else { if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default != NULL) pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace); } } -static void set_mpc_output_csc(struct dc *dc, - struct pipe_ctx *pipe_ctx, - enum dc_color_space colorspace, - uint16_t *matrix, - int opp_id) -{ - struct mpc *mpc = dc->res_pool->mpc; - int i; - struct out_csc_color_matrix tbl_entry; - enum mpc_output_csc_mode ocsc_mode = MPC_OUTPUT_CSC_COEF_A; - - - if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) { - //uint16_t matrix[12]; - for (i = 0; i < 12; i++) - tbl_entry.regval[i] = matrix[i]; - tbl_entry.color_space = colorspace; - - if (mpc->funcs->set_output_csc != NULL) - mpc->funcs->set_output_csc(mpc, - opp_id, - &tbl_entry, - ocsc_mode); - } else { - if (mpc->funcs->set_ocsc_default != NULL) - mpc->funcs->set_ocsc_default(mpc, - opp_id, - colorspace, - ocsc_mode); - } -} - static void program_output_csc(struct dc *dc, struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace, @@ -1736,13 +1443,6 @@ static void program_output_csc(struct dc *dc, program_csc_matrix(pipe_ctx, colorspace, matrix); - else - set_mpc_output_csc(dc, - pipe_ctx, - colorspace, - matrix, - opp_id); - } static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) @@ -1914,35 +1614,73 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state) static void update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) { - struct mpcc_cfg mpcc_cfg = {0}; struct hubp *hubp = pipe_ctx->plane_res.hubp; - struct pipe_ctx *top_pipe; - bool per_pixel_alpha = - pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe; + struct mpcc_blnd_cfg blnd_cfg; + bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe; + int mpcc_id; + struct mpcc *new_mpcc; + struct mpc *mpc = dc->res_pool->mpc; + struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); /* TODO: proper fix once fpga works */ - mpcc_cfg.dpp_id = hubp->inst; - mpcc_cfg.opp_id = pipe_ctx->stream_res.opp->inst; - mpcc_cfg.tree_cfg = &(pipe_ctx->stream_res.opp->mpc_tree); - for (top_pipe = pipe_ctx->top_pipe; top_pipe; top_pipe = top_pipe->top_pipe) - mpcc_cfg.z_index++; if (dc->debug.surface_visual_confirm) dcn10_get_surface_visual_confirm_color( - pipe_ctx, &mpcc_cfg.black_color); + pipe_ctx, &blnd_cfg.black_color); else color_space_to_black_color( dc, pipe_ctx->stream->output_color_space, - &mpcc_cfg.black_color); - mpcc_cfg.per_pixel_alpha = per_pixel_alpha; + &blnd_cfg.black_color); + + if (per_pixel_alpha) + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + else + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + + blnd_cfg.overlap_only = false; + blnd_cfg.global_alpha = 0xff; + blnd_cfg.global_gain = 0xff; + /* DCN1.0 has output CM before MPC which seems to screw with * pre-multiplied alpha. */ - mpcc_cfg.pre_multiplied_alpha = is_rgb_cspace( + blnd_cfg.pre_multiplied_alpha = is_rgb_cspace( pipe_ctx->stream->output_color_space) && per_pixel_alpha; - hubp->mpcc_id = dc->res_pool->mpc->funcs->add(dc->res_pool->mpc, &mpcc_cfg); - hubp->opp_id = mpcc_cfg.opp_id; + + /* + * TODO: remove hack + * Note: currently there is a bug in init_hw such that + * on resume from hibernate, BIOS sets up MPCC0, and + * we do mpcc_remove but the mpcc cannot go to idle + * after remove. This cause us to pick mpcc1 here, + * which causes a pstate hang for yet unknown reason. + */ + mpcc_id = hubp->inst; + + /* check if this MPCC is already being used */ + new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id); + /* remove MPCC if being used */ + if (new_mpcc != NULL) + mpc->funcs->remove_mpcc(mpc, mpc_tree_params, new_mpcc); + else + if (dc->debug.sanity_checks) + mpc->funcs->assert_mpcc_idle_before_connect( + dc->res_pool->mpc, mpcc_id); + + /* Call MPC to insert new plane */ + new_mpcc = mpc->funcs->insert_plane(dc->res_pool->mpc, + mpc_tree_params, + &blnd_cfg, + NULL, + NULL, + hubp->inst, + mpcc_id); + + ASSERT(new_mpcc != NULL); + + hubp->opp_id = pipe_ctx->stream_res.opp->inst; + hubp->mpcc_id = mpcc_id; } static void update_scaler(struct pipe_ctx *pipe_ctx) @@ -1971,7 +1709,7 @@ static void update_dchubp_dpp( union plane_size size = plane_state->plane_size; /* depends on DML calculation, DPP clock value may change dynamically */ - if (pipe_ctx->plane_state->update_flags.raw != 0) { + if (plane_state->update_flags.bits.full_update) { enable_dppclk( dc->hwseq, pipe_ctx->pipe_idx, @@ -2015,7 +1753,8 @@ static void update_dchubp_dpp( } if (plane_state->update_flags.bits.full_update || - plane_state->update_flags.bits.scaling_change) { + plane_state->update_flags.bits.scaling_change || + plane_state->update_flags.bits.position_change) { hubp->funcs->mem_program_viewport( hubp, &pipe_ctx->plane_res.scl_data.viewport, @@ -2037,7 +1776,9 @@ static void update_dchubp_dpp( plane_state->update_flags.bits.horizontal_mirror_change || plane_state->update_flags.bits.rotation_change || plane_state->update_flags.bits.swizzle_change || - plane_state->update_flags.bits.bpp_change) { + plane_state->update_flags.bits.dcc_change || + plane_state->update_flags.bits.bpp_change || + plane_state->update_flags.bits.scaling_change) { hubp->funcs->hubp_program_surface_config( hubp, plane_state->format, @@ -2062,6 +1803,7 @@ static void program_all_pipe_in_tree( struct pipe_ctx *pipe_ctx, struct dc_state *context) { + if (pipe_ctx->top_pipe == NULL) { pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset; @@ -2072,7 +1814,11 @@ static void program_all_pipe_in_tree( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg); - pipe_ctx->stream_res.tg->funcs->set_blank(pipe_ctx->stream_res.tg, !is_pipe_tree_visible(pipe_ctx)); + + if (pipe_ctx->stream_res.tg->funcs->set_blank) + pipe_ctx->stream_res.tg->funcs->set_blank( + pipe_ctx->stream_res.tg, + !is_pipe_tree_visible(pipe_ctx)); } if (pipe_ctx->plane_state != NULL) { @@ -2179,6 +1925,7 @@ static void dcn10_apply_ctx_for_surface( { int i; struct timing_generator *tg; + struct output_pixel_processor *opp; bool removed_pipe[4] = { false }; unsigned int ref_clk_mhz = dc->res_pool->ref_clock_inKhz/1000; bool program_water_mark = false; @@ -2189,6 +1936,8 @@ static void dcn10_apply_ctx_for_surface( if (!top_pipe_to_program) return; + opp = top_pipe_to_program->stream_res.opp; + tg = top_pipe_to_program->stream_res.tg; tg->funcs->lock(tg); @@ -2196,7 +1945,8 @@ static void dcn10_apply_ctx_for_surface( if (num_planes == 0) { /* OTG blank before remove all front end */ - tg->funcs->set_blank(tg, true); + if (tg->funcs->set_blank) + tg->funcs->set_blank(tg, true); } /* Disconnect unused mpcc */ @@ -2236,24 +1986,14 @@ static void dcn10_apply_ctx_for_surface( } } - if (num_planes > 0) { + if (num_planes > 0) program_all_pipe_in_tree(dc, top_pipe_to_program, context); - /* TODO: this is a hack w/a for switching from mpo to pipe split */ - if (stream->cursor_attributes.address.quad_part != 0) { - struct dc_cursor_position position = { 0 }; - - dc_stream_set_cursor_position( - (struct dc_stream_state *)stream, - &position); - dc_stream_set_cursor_attributes( - (struct dc_stream_state *)stream, - &stream->cursor_attributes); - } - } - tg->funcs->unlock(tg); + if (num_planes == 0) + false_optc_underflow_wa(dc, stream, tg); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -2264,7 +2004,7 @@ static void dcn10_apply_ctx_for_surface( pipe_ctx->plane_state->update_flags.bits.full_update) program_water_mark = true; - if (removed_pipe[i] && num_planes == 0) + if (removed_pipe[i]) dcn10_disable_plane(dc, old_pipe_ctx); } @@ -2273,6 +2013,7 @@ static void dcn10_apply_ctx_for_surface( /* pstate stuck check after watermark update */ dcn10_verify_allow_pstate_change_high(dc); } + /* watermark is for all pipes */ hubbub1_program_watermarks(dc->res_pool->hubbub, &context->bw.dcn.watermarks, ref_clk_mhz); @@ -2502,10 +2243,10 @@ static void dcn10_setup_stereo(struct pipe_ctx *pipe_ctx, struct dc *dc) dcn10_config_stereo_parameters(stream, &flags); - pipe_ctx->stream_res.opp->funcs->opp_set_stereo_polarity( + pipe_ctx->stream_res.opp->funcs->opp_program_stereo( pipe_ctx->stream_res.opp, flags.PROGRAM_STEREO == 1 ? true:false, - stream->timing.flags.RIGHT_EYE_3D_POLARITY == 1 ? true:false); + &stream->timing); pipe_ctx->stream_res.tg->funcs->program_stereo( pipe_ctx->stream_res.tg, @@ -2619,7 +2360,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .pplib_apply_display_requirements = dcn10_pplib_apply_display_requirements, .edp_backlight_control = hwss_edp_backlight_control, - .edp_power_control = hwss_edp_power_control + .edp_power_control = hwss_edp_power_control, + .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index b016f4cbd45c74c5931ad9693d8cd3b5a3da69ce..179890b1a8c4e5029501a0caf42ed8acad1c4099 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -25,8 +25,6 @@ #include "reg_helper.h" #include "dcn10_mpc.h" -#include "dc.h" -#include "mem_input.h" #define REG(reg)\ mpc10->mpc_regs->reg @@ -38,17 +36,13 @@ #define FN(reg_name, field_name) \ mpc10->mpc_shift->field_name, mpc10->mpc_mask->field_name -#define MODE_TOP_ONLY 1 -#define MODE_BLEND 3 -#define BLND_PP_ALPHA 0 -#define BLND_GLOBAL_ALPHA 2 - -static void mpc10_set_bg_color( - struct dcn10_mpc *mpc10, +void mpc1_set_bg_color(struct mpc *mpc, struct tg_color *bg_color, - int id) + int mpcc_id) { + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + /* mpc color is 12 bit. tg_color is 10 bit */ /* todo: might want to use 16 bit to represent color and have each * hw block translate to correct color depth. @@ -57,15 +51,47 @@ static void mpc10_set_bg_color( uint32_t bg_g_y = bg_color->color_g_y << 2; uint32_t bg_b_cb = bg_color->color_b_cb << 2; - REG_SET(MPCC_BG_R_CR[id], 0, + REG_SET(MPCC_BG_R_CR[mpcc_id], 0, MPCC_BG_R_CR, bg_r_cr); - REG_SET(MPCC_BG_G_Y[id], 0, + REG_SET(MPCC_BG_G_Y[mpcc_id], 0, MPCC_BG_G_Y, bg_g_y); - REG_SET(MPCC_BG_B_CB[id], 0, + REG_SET(MPCC_BG_B_CB[mpcc_id], 0, MPCC_BG_B_CB, bg_b_cb); } -void mpc10_assert_idle_mpcc(struct mpc *mpc, int id) +static void mpc1_update_blending( + struct mpc *mpc, + struct mpcc_blnd_cfg *blnd_cfg, + int mpcc_id) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + + REG_UPDATE_5(MPCC_CONTROL[mpcc_id], + MPCC_ALPHA_BLND_MODE, blnd_cfg->alpha_mode, + MPCC_ALPHA_MULTIPLIED_MODE, blnd_cfg->pre_multiplied_alpha, + MPCC_BLND_ACTIVE_OVERLAP_ONLY, blnd_cfg->overlap_only, + MPCC_GLOBAL_ALPHA, blnd_cfg->global_alpha, + MPCC_GLOBAL_GAIN, blnd_cfg->global_gain); + + mpc1_set_bg_color(mpc, &blnd_cfg->black_color, mpcc_id); +} + +void mpc1_update_stereo_mix( + struct mpc *mpc, + struct mpcc_sm_cfg *sm_cfg, + int mpcc_id) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + + REG_UPDATE_6(MPCC_SM_CONTROL[mpcc_id], + MPCC_SM_EN, sm_cfg->enable, + MPCC_SM_MODE, sm_cfg->sm_mode, + MPCC_SM_FRAME_ALT, sm_cfg->frame_alt, + MPCC_SM_FIELD_ALT, sm_cfg->field_alt, + MPCC_SM_FORCE_NEXT_FRAME_POL, sm_cfg->force_next_frame_porlarity, + MPCC_SM_FORCE_NEXT_TOP_POL, sm_cfg->force_next_field_polarity); +} +void mpc1_assert_idle_mpcc(struct mpc *mpc, int id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); @@ -75,39 +101,52 @@ void mpc10_assert_idle_mpcc(struct mpc *mpc, int id) 1, 100000); } -static int mpc10_get_idle_mpcc_id(struct dcn10_mpc *mpc10) +struct mpcc *mpc1_get_mpcc(struct mpc *mpc, int mpcc_id) { - int i; - int last_free_mpcc_id = -1; + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - for (i = 0; i < mpc10->num_mpcc; i++) { - uint32_t is_idle = 0; + ASSERT(mpcc_id < mpc10->num_mpcc); + return &(mpc->mpcc_array[mpcc_id]); +} - if (mpc10->mpcc_in_use_mask & 1 << i) - continue; +struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) +{ + struct mpcc *tmp_mpcc = tree->opp_list; - last_free_mpcc_id = i; - REG_GET(MPCC_STATUS[i], MPCC_IDLE, &is_idle); - if (is_idle) - return i; + while (tmp_mpcc != NULL) { + if (tmp_mpcc->dpp_id == dpp_id) + return tmp_mpcc; + tmp_mpcc = tmp_mpcc->mpcc_bot; } + return NULL; +} - /* This assert should never trigger, we have mpcc leak if it does */ - ASSERT(last_free_mpcc_id != -1); - - mpc10_assert_idle_mpcc(&mpc10->base, last_free_mpcc_id); - return last_free_mpcc_id; +bool mpc1_is_mpcc_idle(struct mpc *mpc, int mpcc_id) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + unsigned int top_sel; + unsigned int opp_id; + unsigned int idle; + + REG_GET(MPCC_TOP_SEL[mpcc_id], MPCC_TOP_SEL, &top_sel); + REG_GET(MPCC_OPP_ID[mpcc_id], MPCC_OPP_ID, &opp_id); + REG_GET(MPCC_STATUS[mpcc_id], MPCC_IDLE, &idle); + if (top_sel == 0xf && opp_id == 0xf && idle) + return true; + else + return false; } -static void mpc10_assert_mpcc_idle_before_connect(struct dcn10_mpc *mpc10, int id) +void mpc1_assert_mpcc_idle_before_connect(struct mpc *mpc, int mpcc_id) { + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); unsigned int top_sel, mpc_busy, mpc_idle; - REG_GET(MPCC_TOP_SEL[id], + REG_GET(MPCC_TOP_SEL[mpcc_id], MPCC_TOP_SEL, &top_sel); if (top_sel == 0xf) { - REG_GET_2(MPCC_STATUS[id], + REG_GET_2(MPCC_STATUS[mpcc_id], MPCC_BUSY, &mpc_busy, MPCC_IDLE, &mpc_idle); @@ -116,241 +155,269 @@ static void mpc10_assert_mpcc_idle_before_connect(struct dcn10_mpc *mpc10, int i } } -void mpc10_mpcc_remove( - struct mpc *mpc, - struct mpc_tree_cfg *tree_cfg, - int opp_id, - int dpp_id) -{ - struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - int mpcc_id, z_idx; - - /* find z_idx for the dpp to be removed */ - for (z_idx = 0; z_idx < tree_cfg->num_pipes; z_idx++) - if (tree_cfg->dpp[z_idx] == dpp_id) - break; - - if (z_idx == tree_cfg->num_pipes) { - /* In case of resume from S3/S4, remove mpcc from bios left over */ - REG_SET(MPCC_OPP_ID[dpp_id], 0, - MPCC_OPP_ID, 0xf); - REG_SET(MPCC_TOP_SEL[dpp_id], 0, - MPCC_TOP_SEL, 0xf); - REG_SET(MPCC_BOT_SEL[dpp_id], 0, - MPCC_BOT_SEL, 0xf); - return; - } - - mpcc_id = tree_cfg->mpcc[z_idx]; - - REG_SET(MPCC_OPP_ID[mpcc_id], 0, - MPCC_OPP_ID, 0xf); - REG_SET(MPCC_TOP_SEL[mpcc_id], 0, - MPCC_TOP_SEL, 0xf); - REG_SET(MPCC_BOT_SEL[mpcc_id], 0, - MPCC_BOT_SEL, 0xf); - - if (z_idx > 0) { - int top_mpcc_id = tree_cfg->mpcc[z_idx - 1]; - - if (z_idx + 1 < tree_cfg->num_pipes) - /* mpcc to be removed is in the middle of the tree */ - REG_SET(MPCC_BOT_SEL[top_mpcc_id], 0, - MPCC_BOT_SEL, tree_cfg->mpcc[z_idx + 1]); - else { - /* mpcc to be removed is at the bottom of the tree */ - REG_SET(MPCC_BOT_SEL[top_mpcc_id], 0, - MPCC_BOT_SEL, 0xf); - REG_UPDATE(MPCC_CONTROL[top_mpcc_id], - MPCC_MODE, MODE_TOP_ONLY); - } - } else if (tree_cfg->num_pipes > 1) - /* mpcc to be removed is at the top of the tree */ - REG_SET(MUX[opp_id], 0, - MPC_OUT_MUX, tree_cfg->mpcc[z_idx + 1]); - else - /* mpcc to be removed is the only one in the tree */ - REG_SET(MUX[opp_id], 0, MPC_OUT_MUX, 0xf); - - /* mark this mpcc as not in use */ - mpc10->mpcc_in_use_mask &= ~(1 << mpcc_id); - tree_cfg->num_pipes--; - for (; z_idx < tree_cfg->num_pipes; z_idx++) { - tree_cfg->dpp[z_idx] = tree_cfg->dpp[z_idx + 1]; - tree_cfg->mpcc[z_idx] = tree_cfg->mpcc[z_idx + 1]; - } - tree_cfg->dpp[tree_cfg->num_pipes] = 0xdeadbeef; - tree_cfg->mpcc[tree_cfg->num_pipes] = 0xdeadbeef; -} - -static void mpc10_add_to_tree_cfg( +/* + * Insert DPP into MPC tree based on specified blending position. + * Only used for planes that are part of blending chain for OPP output + * + * Parameters: + * [in/out] mpc - MPC context. + * [in/out] tree - MPC tree structure that plane will be added to. + * [in] blnd_cfg - MPCC blending configuration for the new blending layer. + * [in] sm_cfg - MPCC stereo mix configuration for the new blending layer. + * stereo mix must disable for the very bottom layer of the tree config. + * [in] insert_above_mpcc - Insert new plane above this MPCC. If NULL, insert as bottom plane. + * [in] dpp_id - DPP instance for the plane to be added. + * [in] mpcc_id - The MPCC physical instance to use for blending. + * + * Return: struct mpcc* - MPCC that was added. + */ +struct mpcc *mpc1_insert_plane( struct mpc *mpc, - struct mpcc_cfg *cfg, + struct mpc_tree *tree, + struct mpcc_blnd_cfg *blnd_cfg, + struct mpcc_sm_cfg *sm_cfg, + struct mpcc *insert_above_mpcc, + int dpp_id, int mpcc_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - int mpcc_mode = MODE_TOP_ONLY; - int position = cfg->z_index; - struct mpc_tree_cfg *tree_cfg = cfg->tree_cfg; - int alpha_blnd_mode = cfg->per_pixel_alpha ? - BLND_PP_ALPHA : BLND_GLOBAL_ALPHA; - int z_idx; + struct mpcc *new_mpcc = NULL; - REG_SET(MPCC_OPP_ID[mpcc_id], 0, - MPCC_OPP_ID, cfg->opp_id); + /* sanity check parameters */ + ASSERT(mpcc_id < mpc10->num_mpcc); + ASSERT(!(mpc10->mpcc_in_use_mask & 1 << mpcc_id)); - REG_SET(MPCC_TOP_SEL[mpcc_id], 0, - MPCC_TOP_SEL, cfg->dpp_id); + if (insert_above_mpcc) { + /* check insert_above_mpcc exist in tree->opp_list */ + struct mpcc *temp_mpcc = tree->opp_list; - if (position == 0) { - /* idle dpp/mpcc is added to the top layer of tree */ + while (temp_mpcc && temp_mpcc->mpcc_bot != insert_above_mpcc) + temp_mpcc = temp_mpcc->mpcc_bot; + if (temp_mpcc == NULL) + return NULL; + } - if (tree_cfg->num_pipes > 0) { - /* get instance of previous top mpcc */ - int prev_top_mpcc_id = tree_cfg->mpcc[0]; + /* Get and update MPCC struct parameters */ + new_mpcc = mpc1_get_mpcc(mpc, mpcc_id); + new_mpcc->dpp_id = dpp_id; - REG_SET(MPCC_BOT_SEL[mpcc_id], 0, - MPCC_BOT_SEL, prev_top_mpcc_id); - mpcc_mode = MODE_BLEND; + /* program mux and MPCC_MODE */ + if (insert_above_mpcc) { + new_mpcc->mpcc_bot = insert_above_mpcc; + REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, insert_above_mpcc->mpcc_id); + REG_UPDATE(MPCC_CONTROL[mpcc_id], MPCC_MODE, MPCC_BLEND_MODE_TOP_BOT_BLENDING); + } else { + new_mpcc->mpcc_bot = NULL; + REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); + REG_UPDATE(MPCC_CONTROL[mpcc_id], MPCC_MODE, MPCC_BLEND_MODE_TOP_LAYER_PASSTHROUGH); + } + REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, dpp_id); + REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, tree->opp_id); + + /* update mpc tree mux setting */ + if (tree->opp_list == insert_above_mpcc) { + /* insert the toppest mpcc */ + tree->opp_list = new_mpcc; + REG_UPDATE(MUX[tree->opp_id], MPC_OUT_MUX, mpcc_id); + } else { + /* find insert position */ + struct mpcc *temp_mpcc = tree->opp_list; + + while (temp_mpcc && temp_mpcc->mpcc_bot != insert_above_mpcc) + temp_mpcc = temp_mpcc->mpcc_bot; + if (temp_mpcc && temp_mpcc->mpcc_bot == insert_above_mpcc) { + REG_SET(MPCC_BOT_SEL[temp_mpcc->mpcc_id], 0, MPCC_BOT_SEL, mpcc_id); + temp_mpcc->mpcc_bot = new_mpcc; + if (!insert_above_mpcc) + REG_UPDATE(MPCC_CONTROL[temp_mpcc->mpcc_id], + MPCC_MODE, MPCC_BLEND_MODE_TOP_BOT_BLENDING); } + } - /* opp will get new output. from new added mpcc */ - REG_SET(MUX[cfg->opp_id], 0, MPC_OUT_MUX, mpcc_id); - - } else if (position == tree_cfg->num_pipes) { - /* idle dpp/mpcc is added to the bottom layer of tree */ - - /* get instance of previous bottom mpcc, set to middle layer */ - int prev_bot_mpcc_id = tree_cfg->mpcc[tree_cfg->num_pipes - 1]; - - REG_SET(MPCC_BOT_SEL[prev_bot_mpcc_id], 0, - MPCC_BOT_SEL, mpcc_id); - REG_UPDATE(MPCC_CONTROL[prev_bot_mpcc_id], - MPCC_MODE, MODE_BLEND); - - /* mpcc_id become new bottom mpcc*/ - REG_SET(MPCC_BOT_SEL[mpcc_id], 0, - MPCC_BOT_SEL, 0xf); + /* update the blending configuration */ + new_mpcc->blnd_cfg = *blnd_cfg; + mpc->funcs->update_blending(mpc, &new_mpcc->blnd_cfg, mpcc_id); - } else { - /* idle dpp/mpcc is added to middle of tree */ - int above_mpcc_id = tree_cfg->mpcc[position - 1]; - int below_mpcc_id = tree_cfg->mpcc[position]; - - /* mpcc above new mpcc_id has new bottom mux*/ - REG_SET(MPCC_BOT_SEL[above_mpcc_id], 0, - MPCC_BOT_SEL, mpcc_id); - REG_UPDATE(MPCC_CONTROL[above_mpcc_id], - MPCC_MODE, MODE_BLEND); - - /* mpcc_id bottom mux is from below mpcc*/ - REG_SET(MPCC_BOT_SEL[mpcc_id], 0, - MPCC_BOT_SEL, below_mpcc_id); - mpcc_mode = MODE_BLEND; + /* update the stereo mix settings, if provided */ + if (sm_cfg != NULL) { + new_mpcc->sm_cfg = *sm_cfg; + mpc1_update_stereo_mix(mpc, sm_cfg, mpcc_id); } - REG_SET_4(MPCC_CONTROL[mpcc_id], 0xffffffff, - MPCC_MODE, mpcc_mode, - MPCC_ALPHA_BLND_MODE, alpha_blnd_mode, - MPCC_ALPHA_MULTIPLIED_MODE, cfg->pre_multiplied_alpha, - MPCC_BLND_ACTIVE_OVERLAP_ONLY, false); + /* mark this mpcc as in use */ + mpc10->mpcc_in_use_mask |= 1 << mpcc_id; - /* update mpc_tree_cfg with new mpcc */ - for (z_idx = tree_cfg->num_pipes; z_idx > position; z_idx--) { - tree_cfg->dpp[z_idx] = tree_cfg->dpp[z_idx - 1]; - tree_cfg->mpcc[z_idx] = tree_cfg->mpcc[z_idx - 1]; - } - tree_cfg->dpp[position] = cfg->dpp_id; - tree_cfg->mpcc[position] = mpcc_id; - tree_cfg->num_pipes++; + return new_mpcc; } -int mpc10_mpcc_add(struct mpc *mpc, struct mpcc_cfg *cfg) +/* + * Remove a specified MPCC from the MPC tree. + * + * Parameters: + * [in/out] mpc - MPC context. + * [in/out] tree - MPC tree structure that plane will be removed from. + * [in/out] mpcc - MPCC to be removed from tree. + * + * Return: void + */ +void mpc1_remove_mpcc( + struct mpc *mpc, + struct mpc_tree *tree, + struct mpcc *mpcc_to_remove) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - int mpcc_id, z_idx; - - ASSERT(cfg->z_index < mpc10->num_mpcc); - - /* check in dpp already exists in mpc tree */ - for (z_idx = 0; z_idx < cfg->tree_cfg->num_pipes; z_idx++) - if (cfg->tree_cfg->dpp[z_idx] == cfg->dpp_id) - break; - if (z_idx == cfg->tree_cfg->num_pipes) { - ASSERT(cfg->z_index <= cfg->tree_cfg->num_pipes); - mpcc_id = mpc10_get_idle_mpcc_id(mpc10); - - /* - * TODO: remove hack - * Note: currently there is a bug in init_hw such that - * on resume from hibernate, BIOS sets up MPCC0, and - * we do mpcc_remove but the mpcc cannot go to idle - * after remove. This cause us to pick mpcc1 here, - * which causes a pstate hang for yet unknown reason. - */ - mpcc_id = cfg->dpp_id; - /* end hack*/ - - ASSERT(!(mpc10->mpcc_in_use_mask & 1 << mpcc_id)); - - if (mpc->ctx->dc->debug.sanity_checks) - mpc10_assert_mpcc_idle_before_connect(mpc10, mpcc_id); + bool found = false; + int mpcc_id = mpcc_to_remove->mpcc_id; + + if (tree->opp_list == mpcc_to_remove) { + found = true; + /* remove MPCC from top of tree */ + if (mpcc_to_remove->mpcc_bot) { + /* set the next MPCC in list to be the top MPCC */ + tree->opp_list = mpcc_to_remove->mpcc_bot; + REG_UPDATE(MUX[tree->opp_id], MPC_OUT_MUX, tree->opp_list->mpcc_id); + } else { + /* there are no other MPCC is list */ + tree->opp_list = NULL; + REG_UPDATE(MUX[tree->opp_id], MPC_OUT_MUX, 0xf); + } } else { - ASSERT(cfg->z_index < cfg->tree_cfg->num_pipes); - mpcc_id = cfg->tree_cfg->mpcc[z_idx]; - mpc10_mpcc_remove(mpc, cfg->tree_cfg, cfg->opp_id, cfg->dpp_id); + /* find mpcc to remove MPCC list */ + struct mpcc *temp_mpcc = tree->opp_list; + + while (temp_mpcc && temp_mpcc->mpcc_bot != mpcc_to_remove) + temp_mpcc = temp_mpcc->mpcc_bot; + + if (temp_mpcc && temp_mpcc->mpcc_bot == mpcc_to_remove) { + found = true; + temp_mpcc->mpcc_bot = mpcc_to_remove->mpcc_bot; + if (mpcc_to_remove->mpcc_bot) { + /* remove MPCC in middle of list */ + REG_SET(MPCC_BOT_SEL[temp_mpcc->mpcc_id], 0, + MPCC_BOT_SEL, mpcc_to_remove->mpcc_bot->mpcc_id); + } else { + /* remove MPCC from bottom of list */ + REG_SET(MPCC_BOT_SEL[temp_mpcc->mpcc_id], 0, + MPCC_BOT_SEL, 0xf); + REG_UPDATE(MPCC_CONTROL[temp_mpcc->mpcc_id], + MPCC_MODE, MPCC_BLEND_MODE_TOP_LAYER_PASSTHROUGH); + } + } } - /* add dpp/mpcc pair to mpc_tree_cfg and update mpcc registers */ - mpc10_add_to_tree_cfg(mpc, cfg, mpcc_id); + if (found) { + /* turn off MPCC mux registers */ + REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); + REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); + REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); - /* set background color */ - mpc10_set_bg_color(mpc10, &cfg->black_color, mpcc_id); - - /* mark this mpcc as in use */ - mpc10->mpcc_in_use_mask |= 1 << mpcc_id; + /* mark this mpcc as not in use */ + mpc10->mpcc_in_use_mask &= ~(1 << mpcc_id); + mpcc_to_remove->dpp_id = 0xf; + mpcc_to_remove->mpcc_bot = NULL; + } else { + /* In case of resume from S3/S4, remove mpcc from bios left over */ + REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); + REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); + REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); + } +} - return mpcc_id; +static void mpc1_init_mpcc(struct mpcc *mpcc, int mpcc_inst) +{ + mpcc->mpcc_id = mpcc_inst; + mpcc->dpp_id = 0xf; + mpcc->mpcc_bot = NULL; + mpcc->blnd_cfg.overlap_only = false; + mpcc->blnd_cfg.global_alpha = 0xff; + mpcc->blnd_cfg.global_gain = 0xff; + mpcc->sm_cfg.enable = false; } -void mpc10_update_blend_mode( - struct mpc *mpc, - struct mpcc_cfg *cfg) +/* + * Reset the MPCC HW status by disconnecting all muxes. + * + * Parameters: + * [in/out] mpc - MPC context. + * + * Return: void + */ +void mpc1_mpc_init(struct mpc *mpc) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - int mpcc_id, z_idx; - int alpha_blnd_mode = cfg->per_pixel_alpha ? - BLND_PP_ALPHA : BLND_GLOBAL_ALPHA; + int mpcc_id; + int opp_id; - /* find z_idx for the dpp that requires blending mode update*/ - for (z_idx = 0; z_idx < cfg->tree_cfg->num_pipes; z_idx++) - if (cfg->tree_cfg->dpp[z_idx] == cfg->dpp_id) - break; + mpc10->mpcc_in_use_mask = 0; + for (mpcc_id = 0; mpcc_id < mpc10->num_mpcc; mpcc_id++) { + REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); + REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); + REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); - ASSERT(z_idx < cfg->tree_cfg->num_pipes); - mpcc_id = cfg->tree_cfg->mpcc[z_idx]; + mpc1_init_mpcc(&(mpc->mpcc_array[mpcc_id]), mpcc_id); + } - REG_UPDATE_2(MPCC_CONTROL[mpcc_id], - MPCC_ALPHA_BLND_MODE, alpha_blnd_mode, - MPCC_ALPHA_MULTIPLIED_MODE, cfg->pre_multiplied_alpha); + for (opp_id = 0; opp_id < MAX_OPP; opp_id++) { + if (REG(MUX[opp_id])) + REG_UPDATE(MUX[opp_id], MPC_OUT_MUX, 0xf); + } } -int mpc10_get_opp_id(struct mpc *mpc, int mpcc_id) +void mpc1_init_mpcc_list_from_hw( + struct mpc *mpc, + struct mpc_tree *tree) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - int opp_id = 0xF; - - REG_GET(MPCC_OPP_ID[mpcc_id], MPCC_OPP_ID, &opp_id); - - return opp_id; + unsigned int opp_id; + unsigned int top_sel; + unsigned int bot_sel; + unsigned int out_mux; + struct mpcc *mpcc; + int mpcc_id; + int bot_mpcc_id; + + REG_GET(MUX[tree->opp_id], MPC_OUT_MUX, &out_mux); + + if (out_mux != 0xf) { + for (mpcc_id = 0; mpcc_id < mpc10->num_mpcc; mpcc_id++) { + REG_GET(MPCC_OPP_ID[mpcc_id], MPCC_OPP_ID, &opp_id); + REG_GET(MPCC_TOP_SEL[mpcc_id], MPCC_TOP_SEL, &top_sel); + REG_GET(MPCC_BOT_SEL[mpcc_id], MPCC_BOT_SEL, &bot_sel); + + if (bot_sel == mpcc_id) + bot_sel = 0xf; + + if ((opp_id == tree->opp_id) && (top_sel != 0xf)) { + mpcc = mpc1_get_mpcc(mpc, mpcc_id); + mpcc->dpp_id = top_sel; + mpc10->mpcc_in_use_mask |= 1 << mpcc_id; + + if (out_mux == mpcc_id) + tree->opp_list = mpcc; + if (bot_sel != 0xf && bot_sel < mpc10->num_mpcc) { + bot_mpcc_id = bot_sel; + REG_GET(MPCC_OPP_ID[bot_mpcc_id], MPCC_OPP_ID, &opp_id); + REG_GET(MPCC_TOP_SEL[bot_mpcc_id], MPCC_TOP_SEL, &top_sel); + if ((opp_id == tree->opp_id) && (top_sel != 0xf)) { + struct mpcc *mpcc_bottom = mpc1_get_mpcc(mpc, bot_mpcc_id); + + mpcc->mpcc_bot = mpcc_bottom; + } + } + } + } + } } const struct mpc_funcs dcn10_mpc_funcs = { - .add = mpc10_mpcc_add, - .remove = mpc10_mpcc_remove, - .wait_for_idle = mpc10_assert_idle_mpcc, - .update_blend_mode = mpc10_update_blend_mode, - .get_opp_id = mpc10_get_opp_id, + .insert_plane = mpc1_insert_plane, + .remove_mpcc = mpc1_remove_mpcc, + .mpc_init = mpc1_mpc_init, + .get_mpcc_for_dpp = mpc1_get_mpcc_for_dpp, + .wait_for_idle = mpc1_assert_idle_mpcc, + .assert_mpcc_idle_before_connect = mpc1_assert_mpcc_idle_before_connect, + .init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw, + .update_blending = mpc1_update_blending, }; void dcn10_mpc_construct(struct dcn10_mpc *mpc10, @@ -360,6 +427,8 @@ void dcn10_mpc_construct(struct dcn10_mpc *mpc10, const struct dcn_mpc_mask *mpc_mask, int num_mpcc) { + int i; + mpc10->base.ctx = ctx; mpc10->base.funcs = &dcn10_mpc_funcs; @@ -370,5 +439,8 @@ void dcn10_mpc_construct(struct dcn10_mpc *mpc10, mpc10->mpcc_in_use_mask = 0; mpc10->num_mpcc = num_mpcc; + + for (i = 0; i < MAX_MPCC; i++) + mpc1_init_mpcc(&mpc10->base.mpcc_array[i], i); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h index e85e1f342266b7bcad3f7a11ae3c4fb91c2f24b7..267a2995ef6e74b2e05ec492c1a8aff51c4a8bf6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h @@ -30,9 +30,6 @@ #define TO_DCN10_MPC(mpc_base) \ container_of(mpc_base, struct dcn10_mpc, base) -#define MAX_MPCC 6 -#define MAX_OPP 6 - #define MPC_COMMON_REG_LIST_DCN1_0(inst) \ SRII(MPCC_TOP_SEL, MPCC, inst),\ SRII(MPCC_BOT_SEL, MPCC, inst),\ @@ -42,7 +39,8 @@ SRII(MPCC_BG_G_Y, MPCC, inst),\ SRII(MPCC_BG_R_CR, MPCC, inst),\ SRII(MPCC_BG_B_CB, MPCC, inst),\ - SRII(MPCC_BG_B_CB, MPCC, inst) + SRII(MPCC_BG_B_CB, MPCC, inst),\ + SRII(MPCC_SM_CONTROL, MPCC, inst) #define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(inst) \ SRII(MUX, MPC_OUT, inst) @@ -56,6 +54,7 @@ uint32_t MPCC_BG_G_Y[MAX_MPCC]; \ uint32_t MPCC_BG_R_CR[MAX_MPCC]; \ uint32_t MPCC_BG_B_CB[MAX_MPCC]; \ + uint32_t MPCC_SM_CONTROL[MAX_MPCC]; \ uint32_t MUX[MAX_OPP]; #define MPC_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\ @@ -65,12 +64,20 @@ SF(MPCC0_MPCC_CONTROL, MPCC_ALPHA_BLND_MODE, mask_sh),\ SF(MPCC0_MPCC_CONTROL, MPCC_ALPHA_MULTIPLIED_MODE, mask_sh),\ SF(MPCC0_MPCC_CONTROL, MPCC_BLND_ACTIVE_OVERLAP_ONLY, mask_sh),\ + SF(MPCC0_MPCC_CONTROL, MPCC_GLOBAL_ALPHA, mask_sh),\ + SF(MPCC0_MPCC_CONTROL, MPCC_GLOBAL_GAIN, mask_sh),\ SF(MPCC0_MPCC_STATUS, MPCC_IDLE, mask_sh),\ SF(MPCC0_MPCC_STATUS, MPCC_BUSY, mask_sh),\ SF(MPCC0_MPCC_OPP_ID, MPCC_OPP_ID, mask_sh),\ SF(MPCC0_MPCC_BG_G_Y, MPCC_BG_G_Y, mask_sh),\ SF(MPCC0_MPCC_BG_R_CR, MPCC_BG_R_CR, mask_sh),\ SF(MPCC0_MPCC_BG_B_CB, MPCC_BG_B_CB, mask_sh),\ + SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_EN, mask_sh),\ + SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_MODE, mask_sh),\ + SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FRAME_ALT, mask_sh),\ + SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FIELD_ALT, mask_sh),\ + SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_FRAME_POL, mask_sh),\ + SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_TOP_POL, mask_sh),\ SF(MPC_OUT0_MUX, MPC_OUT_MUX, mask_sh) #define MPC_REG_FIELD_LIST(type) \ @@ -80,12 +87,20 @@ type MPCC_ALPHA_BLND_MODE;\ type MPCC_ALPHA_MULTIPLIED_MODE;\ type MPCC_BLND_ACTIVE_OVERLAP_ONLY;\ + type MPCC_GLOBAL_ALPHA;\ + type MPCC_GLOBAL_GAIN;\ type MPCC_IDLE;\ type MPCC_BUSY;\ type MPCC_OPP_ID;\ type MPCC_BG_G_Y;\ type MPCC_BG_R_CR;\ type MPCC_BG_B_CB;\ + type MPCC_SM_EN;\ + type MPCC_SM_MODE;\ + type MPCC_SM_FRAME_ALT;\ + type MPCC_SM_FIELD_ALT;\ + type MPCC_SM_FORCE_NEXT_FRAME_POL;\ + type MPCC_SM_FORCE_NEXT_TOP_POL;\ type MPC_OUT_MUX; struct dcn_mpc_registers { @@ -117,23 +132,55 @@ void dcn10_mpc_construct(struct dcn10_mpc *mpcc10, const struct dcn_mpc_mask *mpc_mask, int num_mpcc); -int mpc10_mpcc_add( - struct mpc *mpc, - struct mpcc_cfg *cfg); - -void mpc10_mpcc_remove( - struct mpc *mpc, - struct mpc_tree_cfg *tree_cfg, - int opp_id, - int dpp_id); - -void mpc10_assert_idle_mpcc( - struct mpc *mpc, - int id); - -void mpc10_update_blend_mode( - struct mpc *mpc, - struct mpcc_cfg *cfg); -int mpc10_get_opp_id(struct mpc *mpc, int mpcc_id); +struct mpcc *mpc1_insert_plane( + struct mpc *mpc, + struct mpc_tree *tree, + struct mpcc_blnd_cfg *blnd_cfg, + struct mpcc_sm_cfg *sm_cfg, + struct mpcc *insert_above_mpcc, + int dpp_id, + int mpcc_id); + +void mpc1_remove_mpcc( + struct mpc *mpc, + struct mpc_tree *tree, + struct mpcc *mpcc); + +void mpc1_mpc_init( + struct mpc *mpc); + +void mpc1_assert_idle_mpcc( + struct mpc *mpc, + int id); + +void mpc1_set_bg_color( + struct mpc *mpc, + struct tg_color *bg_color, + int id); + +void mpc1_update_stereo_mix( + struct mpc *mpc, + struct mpcc_sm_cfg *sm_cfg, + int mpcc_id); + +bool mpc1_is_mpcc_idle( + struct mpc *mpc, + int mpcc_id); + +void mpc1_assert_mpcc_idle_before_connect( + struct mpc *mpc, + int mpcc_id); + +void mpc1_init_mpcc_list_from_hw( + struct mpc *mpc, + struct mpc_tree *tree); + +struct mpcc *mpc1_get_mpcc( + struct mpc *mpc, + int mpcc_id); + +struct mpcc *mpc1_get_mpcc_for_dpp( + struct mpc_tree *tree, + int dpp_id); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c index 6d6f67b7d30e1bc7533175eb33f2b1c4c3d8f73f..f6ba0eef4489b6f72fb97069eb90ec8b76631c9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c @@ -296,13 +296,75 @@ void opp1_program_fmt( return; } -void opp1_set_stereo_polarity( - struct output_pixel_processor *opp, - bool enable, bool rightEyePolarity) +void opp1_program_stereo( + struct output_pixel_processor *opp, + bool enable, + const struct dc_crtc_timing *timing) { struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); - REG_UPDATE(FMT_CONTROL, FMT_STEREOSYNC_OVERRIDE, enable); + uint32_t active_width = timing->h_addressable - timing->h_border_right - timing->h_border_right; + uint32_t space1_size = timing->v_total - timing->v_addressable; + /* TODO: confirm computation of space2_size */ + uint32_t space2_size = timing->v_total - timing->v_addressable; + + if (!enable) { + active_width = 0; + space1_size = 0; + space2_size = 0; + } + + /* TODO: for which cases should FMT_STEREOSYNC_OVERRIDE be set? */ + REG_UPDATE(FMT_CONTROL, FMT_STEREOSYNC_OVERRIDE, 0); + + REG_UPDATE(OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, active_width); + + /* Program OPPBUF_3D_VACT_SPACE1_SIZE and OPPBUF_VACT_SPACE2_SIZE registers + * In 3D progressive frames, Vactive space happens only in between the 2 frames, + * so only need to program OPPBUF_3D_VACT_SPACE1_SIZE + * In 3D alternative frames, left and right frames, top and bottom field. + */ + if (timing->timing_3d_format == TIMING_3D_FORMAT_FRAME_ALTERNATE) + REG_UPDATE(OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE2_SIZE, space2_size); + else + REG_UPDATE(OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, space1_size); + + /* TODO: Is programming of OPPBUF_DUMMY_DATA_R/G/B needed? */ + /* + REG_UPDATE(OPPBUF_3D_PARAMETERS_0, + OPPBUF_DUMMY_DATA_R, data_r); + REG_UPDATE(OPPBUF_3D_PARAMETERS_1, + OPPBUF_DUMMY_DATA_G, data_g); + REG_UPDATE(OPPBUF_3D_PARAMETERS_1, + OPPBUF_DUMMY_DATA_B, _data_b); + */ +} + +void opp1_program_oppbuf( + struct output_pixel_processor *opp, + struct oppbuf_params *oppbuf) +{ + struct dcn10_opp *oppn10 = TO_DCN10_OPP(opp); + + /* Program the oppbuf active width to be the frame width from mpc */ + REG_UPDATE(OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, oppbuf->active_width); + + /* Specifies the number of segments in multi-segment mode (DP-MSO operation) + * description "In 1/2/4 segment mode, specifies the horizontal active width in pixels of the display panel. + * In 4 segment split left/right mode, specifies the horizontal 1/2 active width in pixels of the display panel. + * Used to determine segment boundaries in multi-segment mode. Used to determine the width of the vertical active space in 3D frame packed modes. + * OPPBUF_ACTIVE_WIDTH must be integer divisible by the total number of segments." + */ + REG_UPDATE(OPPBUF_CONTROL, OPPBUF_DISPLAY_SEGMENTATION, oppbuf->mso_segmentation); + + /* description "Specifies the number of overlap pixels (1-8 overlapping pixels supported), used in multi-segment mode (DP-MSO operation)" */ + REG_UPDATE(OPPBUF_CONTROL, OPPBUF_OVERLAP_PIXEL_NUM, oppbuf->mso_overlap_pixel_num); + + /* description "Specifies the number of times a pixel is replicated (0-15 pixel replications supported). + * A value of 0 disables replication. The total number of times a pixel is output is OPPBUF_PIXEL_REPETITION + 1." + */ + REG_UPDATE(OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, oppbuf->pixel_repetition); + } /*****************************************/ @@ -319,7 +381,7 @@ static struct opp_funcs dcn10_opp_funcs = { .opp_set_dyn_expansion = opp1_set_dyn_expansion, .opp_program_fmt = opp1_program_fmt, .opp_program_bit_depth_reduction = opp1_program_bit_depth_reduction, - .opp_set_stereo_polarity = opp1_set_stereo_polarity, + .opp_program_stereo = opp1_program_stereo, .opp_destroy = opp1_destroy }; @@ -330,6 +392,7 @@ void dcn10_opp_construct(struct dcn10_opp *oppn10, const struct dcn10_opp_shift *opp_shift, const struct dcn10_opp_mask *opp_mask) { + oppn10->base.ctx = ctx; oppn10->base.inst = inst; oppn10->base.funcs = &dcn10_opp_funcs; @@ -338,4 +401,3 @@ void dcn10_opp_construct(struct dcn10_opp *oppn10, oppn10->opp_shift = opp_shift; oppn10->opp_mask = opp_mask; } - diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h index f3c298ec37fb8d222c417a2efdf746f7dbae714b..bc5058af626627d62d9f6bf8266764ff75910a84 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.h @@ -41,7 +41,10 @@ SRI(FMT_DITHER_RAND_B_SEED, FMT, id), \ SRI(FMT_CLAMP_CNTL, FMT, id), \ SRI(FMT_DYNAMIC_EXP_CNTL, FMT, id), \ - SRI(FMT_MAP420_MEMORY_CONTROL, FMT, id) + SRI(FMT_MAP420_MEMORY_CONTROL, FMT, id), \ + SRI(OPPBUF_CONTROL, OPPBUF, id),\ + SRI(OPPBUF_3D_PARAMETERS_0, OPPBUF, id), \ + SRI(OPPBUF_3D_PARAMETERS_1, OPPBUF, id) #define OPP_REG_LIST_DCN10(id) \ OPP_REG_LIST_DCN(id) @@ -54,7 +57,11 @@ uint32_t FMT_DITHER_RAND_B_SEED; \ uint32_t FMT_CLAMP_CNTL; \ uint32_t FMT_DYNAMIC_EXP_CNTL; \ - uint32_t FMT_MAP420_MEMORY_CONTROL; + uint32_t FMT_MAP420_MEMORY_CONTROL; \ + uint32_t OPPBUF_CONTROL; \ + uint32_t OPPBUF_CONTROL1; \ + uint32_t OPPBUF_3D_PARAMETERS_0; \ + uint32_t OPPBUF_3D_PARAMETERS_1 #define OPP_MASK_SH_LIST_DCN(mask_sh) \ OPP_SF(FMT0_FMT_BIT_DEPTH_CONTROL, FMT_TRUNCATE_EN, mask_sh), \ @@ -78,10 +85,16 @@ OPP_SF(FMT0_FMT_CLAMP_CNTL, FMT_CLAMP_COLOR_FORMAT, mask_sh), \ OPP_SF(FMT0_FMT_DYNAMIC_EXP_CNTL, FMT_DYNAMIC_EXP_EN, mask_sh), \ OPP_SF(FMT0_FMT_DYNAMIC_EXP_CNTL, FMT_DYNAMIC_EXP_MODE, mask_sh), \ - OPP_SF(FMT0_FMT_MAP420_MEMORY_CONTROL, FMT_MAP420MEM_PWR_FORCE, mask_sh) + OPP_SF(FMT0_FMT_MAP420_MEMORY_CONTROL, FMT_MAP420MEM_PWR_FORCE, mask_sh), \ + OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, mask_sh),\ + OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_PIXEL_REPETITION, mask_sh),\ + OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, mask_sh), \ + OPP_SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE2_SIZE, mask_sh) #define OPP_MASK_SH_LIST_DCN10(mask_sh) \ - OPP_MASK_SH_LIST_DCN(mask_sh) + OPP_MASK_SH_LIST_DCN(mask_sh), \ + OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_DISPLAY_SEGMENTATION, mask_sh),\ + OPP_SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_OVERLAP_PIXEL_NUM, mask_sh) #define OPP_DCN10_REG_FIELD_LIST(type) \ type FMT_TRUNCATE_EN; \ @@ -105,18 +118,25 @@ type FMT_DYNAMIC_EXP_EN; \ type FMT_DYNAMIC_EXP_MODE; \ type FMT_MAP420MEM_PWR_FORCE; \ - type FMT_STEREOSYNC_OVERRIDE; + type FMT_STEREOSYNC_OVERRIDE; \ + type OPPBUF_ACTIVE_WIDTH;\ + type OPPBUF_PIXEL_REPETITION;\ + type OPPBUF_DISPLAY_SEGMENTATION;\ + type OPPBUF_OVERLAP_PIXEL_NUM;\ + type OPPBUF_NUM_SEGMENT_PADDED_PIXELS; \ + type OPPBUF_3D_VACT_SPACE1_SIZE; \ + type OPPBUF_3D_VACT_SPACE2_SIZE struct dcn10_opp_registers { - OPP_COMMON_REG_VARIABLE_LIST + OPP_COMMON_REG_VARIABLE_LIST; }; struct dcn10_opp_shift { - OPP_DCN10_REG_FIELD_LIST(uint8_t) + OPP_DCN10_REG_FIELD_LIST(uint8_t); }; struct dcn10_opp_mask { - OPP_DCN10_REG_FIELD_LIST(uint32_t) + OPP_DCN10_REG_FIELD_LIST(uint32_t); }; struct dcn10_opp { @@ -151,9 +171,10 @@ void opp1_program_bit_depth_reduction( struct output_pixel_processor *opp, const struct bit_depth_reduction_params *params); -void opp1_set_stereo_polarity( - struct output_pixel_processor *opp, - bool enable, bool rightEyePolarity); +void opp1_program_stereo( + struct output_pixel_processor *opp, + bool enable, + const struct dc_crtc_timing *timing); void opp1_destroy(struct output_pixel_processor **opp); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c similarity index 76% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c rename to drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 73ff78f9cae1cc1868afb23b3b10e34c26bfd54f..4bf64d1b2c60223ff8ad557f2202d6bf1890e346 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -23,19 +23,20 @@ * */ + #include "reg_helper.h" -#include "dcn10_timing_generator.h" +#include "dcn10_optc.h" #include "dc.h" #define REG(reg)\ - tgn10->tg_regs->reg + optc1->tg_regs->reg #define CTX \ - tgn10->base.ctx + optc1->base.ctx #undef FN #define FN(reg_name, field_name) \ - tgn10->tg_shift->field_name, tgn10->tg_mask->field_name + optc1->tg_shift->field_name, optc1->tg_mask->field_name #define STATIC_SCREEN_EVENT_MASK_RANGETIMING_DOUBLE_BUFFER_UPDATE_EN 0x100 @@ -45,8 +46,8 @@ * This is a workaround for a bug that has existed since R5xx and has not been * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. */ -static void tgn10_apply_front_porch_workaround( - struct timing_generator *tg, +static void optc1_apply_front_porch_workaround( + struct timing_generator *optc, struct dc_crtc_timing *timing) { if (timing->flags.INTERLACE == 1) { @@ -58,30 +59,30 @@ static void tgn10_apply_front_porch_workaround( } } -static void tgn10_program_global_sync( - struct timing_generator *tg) +void optc1_program_global_sync( + struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); - if (tg->dlg_otg_param.vstartup_start == 0) { + if (optc->dlg_otg_param.vstartup_start == 0) { BREAK_TO_DEBUGGER(); return; } REG_SET(OTG_VSTARTUP_PARAM, 0, - VSTARTUP_START, tg->dlg_otg_param.vstartup_start); + VSTARTUP_START, optc->dlg_otg_param.vstartup_start); REG_SET_2(OTG_VUPDATE_PARAM, 0, - VUPDATE_OFFSET, tg->dlg_otg_param.vupdate_offset, - VUPDATE_WIDTH, tg->dlg_otg_param.vupdate_width); + VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset, + VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width); REG_SET(OTG_VREADY_PARAM, 0, - VREADY_OFFSET, tg->dlg_otg_param.vready_offset); + VREADY_OFFSET, optc->dlg_otg_param.vready_offset); } -static void tgn10_disable_stereo(struct timing_generator *tg) +static void optc1_disable_stereo(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_SET(OTG_STEREO_CONTROL, 0, OTG_STEREO_EN, 0); @@ -90,11 +91,6 @@ static void tgn10_disable_stereo(struct timing_generator *tg) OTG_3D_STRUCTURE_EN, 0, OTG_3D_STRUCTURE_V_UPDATE_MODE, 0, OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); - - REG_UPDATE(OPPBUF_CONTROL, - OPPBUF_ACTIVE_WIDTH, 0); - REG_UPDATE(OPPBUF_3D_PARAMETERS_0, - OPPBUF_3D_VACT_SPACE1_SIZE, 0); } /** @@ -102,8 +98,8 @@ static void tgn10_disable_stereo(struct timing_generator *tg) * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition. * Including SYNC. Call BIOS command table to program Timings. */ -static void tgn10_program_timing( - struct timing_generator *tg, +void optc1_program_timing( + struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, bool use_vbios) { @@ -121,10 +117,10 @@ static void tgn10_program_timing( uint32_t h_div_2; int32_t vertical_line_start; - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); patched_crtc_timing = *dc_crtc_timing; - tgn10_apply_front_porch_workaround(tg, &patched_crtc_timing); + optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); /* Load horizontal timing */ @@ -217,7 +213,7 @@ static void tgn10_program_timing( /* Use OTG_VERTICAL_INTERRUPT2 replace VUPDATE interrupt, * program the reg for interrupt postition. */ - vertical_line_start = asic_blank_end - tg->dlg_otg_param.vstartup_start + 1; + vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; if (vertical_line_start < 0) { ASSERT(0); vertical_line_start = 0; @@ -233,26 +229,25 @@ static void tgn10_program_timing( OTG_V_SYNC_A_POL, v_sync_polarity); v_init = asic_blank_start; - if (tg->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT || - tg->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST || - tg->dlg_otg_param.signal == SIGNAL_TYPE_EDP) { + if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT || + optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST || + optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) { start_point = 1; if (patched_crtc_timing.flags.INTERLACE == 1) field_num = 1; } v_fp2 = 0; - if (tg->dlg_otg_param.vstartup_start > asic_blank_end) - v_fp2 = tg->dlg_otg_param.vstartup_start > asic_blank_end; + if (optc->dlg_otg_param.vstartup_start > asic_blank_end) + v_fp2 = optc->dlg_otg_param.vstartup_start > asic_blank_end; /* Interlace */ if (patched_crtc_timing.flags.INTERLACE == 1) { REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 1); v_init = v_init / 2; - if ((tg->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) + if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end) v_fp2 = v_fp2 / 2; - } - else + } else REG_UPDATE(OTG_INTERLACE_CONTROL, OTG_INTERLACE_ENABLE, 0); @@ -270,13 +265,13 @@ static void tgn10_program_timing( OTG_START_POINT_CNTL, start_point, OTG_FIELD_NUMBER_CNTL, field_num); - tgn10_program_global_sync(tg); + optc1_program_global_sync(optc); /* TODO * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1 * program_horz_count_by_2 * for DVI 30bpp mode, 0 otherwise - * program_horz_count_by_2(tg, &patched_crtc_timing); + * program_horz_count_by_2(optc, &patched_crtc_timing); */ /* Enable stereo - only when we need to pack 3D frame. Other types @@ -290,9 +285,9 @@ static void tgn10_program_timing( } -static void tgn10_set_blank_data_double_buffer(struct timing_generator *tg, bool enable) +static void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t blank_data_double_buffer_enable = enable ? 1 : 0; @@ -304,9 +299,9 @@ static void tgn10_set_blank_data_double_buffer(struct timing_generator *tg, bool * unblank_crtc * Call ASIC Control Object to UnBlank CRTC. */ -static void tgn10_unblank_crtc(struct timing_generator *tg) +static void optc1_unblank_crtc(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t vertical_interrupt_enable = 0; REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL, @@ -316,7 +311,7 @@ static void tgn10_unblank_crtc(struct timing_generator *tg) * this check will be removed. */ if (vertical_interrupt_enable) - tgn10_set_blank_data_double_buffer(tg, true); + optc1_set_blank_data_double_buffer(optc, true); REG_UPDATE_2(OTG_BLANK_CONTROL, OTG_BLANK_DATA_EN, 0, @@ -328,36 +323,29 @@ static void tgn10_unblank_crtc(struct timing_generator *tg) * Call ASIC Control Object to Blank CRTC. */ -static void tgn10_blank_crtc(struct timing_generator *tg) +static void optc1_blank_crtc(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_UPDATE_2(OTG_BLANK_CONTROL, OTG_BLANK_DATA_EN, 1, OTG_BLANK_DE_MODE, 0); - /* todo: why are we waiting for BLANK_DATA_EN? shouldn't we be waiting - * for status? - */ - REG_WAIT(OTG_BLANK_CONTROL, - OTG_BLANK_DATA_EN, 1, - 1, 100000); - - tgn10_set_blank_data_double_buffer(tg, false); + optc1_set_blank_data_double_buffer(optc, false); } -static void tgn10_set_blank(struct timing_generator *tg, +void optc1_set_blank(struct timing_generator *optc, bool enable_blanking) { if (enable_blanking) - tgn10_blank_crtc(tg); + optc1_blank_crtc(optc); else - tgn10_unblank_crtc(tg); + optc1_unblank_crtc(optc); } -static bool tgn10_is_blanked(struct timing_generator *tg) +bool optc1_is_blanked(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t blank_en; uint32_t blank_state; @@ -368,9 +356,9 @@ static bool tgn10_is_blanked(struct timing_generator *tg) return blank_en && blank_state; } -static void tgn10_enable_optc_clock(struct timing_generator *tg, bool enable) +void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); if (enable) { REG_UPDATE_2(OPTC_INPUT_CLOCK_CONTROL, @@ -403,19 +391,19 @@ static void tgn10_enable_optc_clock(struct timing_generator *tg, bool enable) * Enable CRTC * Enable CRTC - call ASIC Control Object to enable Timing generator. */ -static bool tgn10_enable_crtc(struct timing_generator *tg) +static bool optc1_enable_crtc(struct timing_generator *optc) { /* TODO FPGA wait for answer * OTG_MASTER_UPDATE_MODE != CRTC_MASTER_UPDATE_MODE * OTG_MASTER_UPDATE_LOCK != CRTC_MASTER_UPDATE_LOCK */ - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); /* opp instance for OTG. For DCN1.0, ODM is remoed. * OPP and OPTC should 1:1 mapping */ REG_UPDATE(OPTC_DATA_SOURCE_SELECT, - OPTC_SRC_SEL, tg->inst); + OPTC_SRC_SEL, optc->inst); /* VTG enable first is for HW workaround */ REG_UPDATE(CONTROL, @@ -430,9 +418,9 @@ static bool tgn10_enable_crtc(struct timing_generator *tg) } /* disable_crtc - call ASIC Control Object to disable Timing generator. */ -static bool tgn10_disable_crtc(struct timing_generator *tg) +bool optc1_disable_crtc(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); /* disable otg request until end of the first line * in the vertical blank region @@ -453,11 +441,11 @@ static bool tgn10_disable_crtc(struct timing_generator *tg) } -static void tgn10_program_blank_color( - struct timing_generator *tg, +void optc1_program_blank_color( + struct timing_generator *optc, const struct tg_color *black_color) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_SET_3(OTG_BLACK_COLOR, 0, OTG_BLACK_COLOR_B_CB, black_color->color_b_cb, @@ -465,15 +453,15 @@ static void tgn10_program_blank_color( OTG_BLACK_COLOR_R_CR, black_color->color_r_cr); } -static bool tgn10_validate_timing( - struct timing_generator *tg, +bool optc1_validate_timing( + struct timing_generator *optc, const struct dc_crtc_timing *timing) { uint32_t interlace_factor; uint32_t v_blank; uint32_t h_blank; uint32_t min_v_blank; - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); ASSERT(timing != NULL); @@ -503,19 +491,19 @@ static bool tgn10_validate_timing( * needs more than 8192 horizontal and * more than 8192 vertical total pixels) */ - if (timing->h_total > tgn10->max_h_total || - timing->v_total > tgn10->max_v_total) + if (timing->h_total > optc1->max_h_total || + timing->v_total > optc1->max_v_total) return false; - if (h_blank < tgn10->min_h_blank) + if (h_blank < optc1->min_h_blank) return false; - if (timing->h_sync_width < tgn10->min_h_sync_width || - timing->v_sync_width < tgn10->min_v_sync_width) + if (timing->h_sync_width < optc1->min_h_sync_width || + timing->v_sync_width < optc1->min_v_sync_width) return false; - min_v_blank = timing->flags.INTERLACE?tgn10->min_v_blank_interlace:tgn10->min_v_blank; + min_v_blank = timing->flags.INTERLACE?optc1->min_v_blank_interlace:optc1->min_v_blank; if (v_blank < min_v_blank) return false; @@ -532,15 +520,15 @@ static bool tgn10_validate_timing( * holds the counter of frames. * * @param - * struct timing_generator *tg - [in] timing generator which controls the + * struct timing_generator *optc - [in] timing generator which controls the * desired CRTC * * @return * Counter of frames, which should equal to number of vblanks. */ -static uint32_t tgn10_get_vblank_counter(struct timing_generator *tg) +uint32_t optc1_get_vblank_counter(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t frame_count; REG_GET(OTG_STATUS_FRAME_COUNT, @@ -549,34 +537,34 @@ static uint32_t tgn10_get_vblank_counter(struct timing_generator *tg) return frame_count; } -static void tgn10_lock(struct timing_generator *tg) +void optc1_lock(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_SET(OTG_GLOBAL_CONTROL0, 0, - OTG_MASTER_UPDATE_LOCK_SEL, tg->inst); + OTG_MASTER_UPDATE_LOCK_SEL, optc->inst); REG_SET(OTG_MASTER_UPDATE_LOCK, 0, OTG_MASTER_UPDATE_LOCK, 1); /* Should be fast, status does not update on maximus */ - if (tg->ctx->dce_environment != DCE_ENV_FPGA_MAXIMUS) + if (optc->ctx->dce_environment != DCE_ENV_FPGA_MAXIMUS) REG_WAIT(OTG_MASTER_UPDATE_LOCK, UPDATE_LOCK_STATUS, 1, 1, 10); } -static void tgn10_unlock(struct timing_generator *tg) +void optc1_unlock(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_SET(OTG_MASTER_UPDATE_LOCK, 0, OTG_MASTER_UPDATE_LOCK, 0); } -static void tgn10_get_position(struct timing_generator *tg, +void optc1_get_position(struct timing_generator *optc, struct crtc_position *position) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_GET_2(OTG_STATUS_POSITION, OTG_HORZ_COUNT, &position->horizontal_count, @@ -586,12 +574,12 @@ static void tgn10_get_position(struct timing_generator *tg, OTG_VERT_COUNT_NOM, &position->nominal_vcount); } -static bool tgn10_is_counter_moving(struct timing_generator *tg) +bool optc1_is_counter_moving(struct timing_generator *optc) { struct crtc_position position1, position2; - tg->funcs->get_position(tg, &position1); - tg->funcs->get_position(tg, &position2); + optc->funcs->get_position(optc, &position1); + optc->funcs->get_position(optc, &position2); if (position1.horizontal_count == position2.horizontal_count && position1.vertical_count == position2.vertical_count) @@ -600,10 +588,10 @@ static bool tgn10_is_counter_moving(struct timing_generator *tg) return true; } -static bool tgn10_did_triggered_reset_occur( - struct timing_generator *tg) +bool optc1_did_triggered_reset_occur( + struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t occurred_force, occurred_vsync; REG_GET(OTG_FORCE_COUNT_NOW_CNTL, @@ -615,9 +603,9 @@ static bool tgn10_did_triggered_reset_occur( return occurred_vsync != 0 || occurred_force != 0; } -static void tgn10_disable_reset_trigger(struct timing_generator *tg) +void optc1_disable_reset_trigger(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_WRITE(OTG_TRIGA_CNTL, 0); @@ -628,9 +616,9 @@ static void tgn10_disable_reset_trigger(struct timing_generator *tg) OTG_FORCE_VSYNC_NEXT_LINE_CLEAR, 1); } -static void tgn10_enable_reset_trigger(struct timing_generator *tg, int source_tg_inst) +void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t falling_edge; REG_GET(OTG_V_SYNC_A_CNTL, @@ -662,12 +650,12 @@ static void tgn10_enable_reset_trigger(struct timing_generator *tg, int source_t OTG_FORCE_COUNT_NOW_MODE, 2); } -void tgn10_enable_crtc_reset( - struct timing_generator *tg, +void optc1_enable_crtc_reset( + struct timing_generator *optc, int source_tg_inst, struct crtc_trigger_info *crtc_tp) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t falling_edge = 0; uint32_t rising_edge = 0; @@ -707,10 +695,10 @@ void tgn10_enable_crtc_reset( } } -static void tgn10_wait_for_state(struct timing_generator *tg, +void optc1_wait_for_state(struct timing_generator *optc, enum crtc_state state) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); switch (state) { case CRTC_STATE_VBLANK: @@ -730,8 +718,8 @@ static void tgn10_wait_for_state(struct timing_generator *tg, } } -static void tgn10_set_early_control( - struct timing_generator *tg, +void optc1_set_early_control( + struct timing_generator *optc, uint32_t early_cntl) { /* asic design change, do not need this control @@ -740,11 +728,11 @@ static void tgn10_set_early_control( } -static void tgn10_set_static_screen_control( - struct timing_generator *tg, +void optc1_set_static_screen_control( + struct timing_generator *optc, uint32_t value) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); /* Bit 8 is no longer applicable in RV for PSR case, * set bit 8 to 0 if given @@ -769,11 +757,11 @@ static void tgn10_set_static_screen_control( * ***************************************************************************** */ -static void tgn10_set_drr( - struct timing_generator *tg, +void optc1_set_drr( + struct timing_generator *optc, const struct drr_params *params) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); if (params != NULL && params->vertical_total_max > 0 && @@ -806,15 +794,15 @@ static void tgn10_set_drr( } } -static void tgn10_set_test_pattern( - struct timing_generator *tg, +static void optc1_set_test_pattern( + struct timing_generator *optc, /* TODO: replace 'controller_dp_test_pattern' by 'test_pattern_mode' * because this is not DP-specific (which is probably somewhere in DP * encoder) */ enum controller_dp_test_pattern test_pattern, enum dc_color_depth color_depth) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); enum test_pattern_color_format bit_depth; enum test_pattern_dyn_range dyn_range; enum test_pattern_mode mode; @@ -1065,35 +1053,30 @@ static void tgn10_set_test_pattern( } } -static void tgn10_get_crtc_scanoutpos( - struct timing_generator *tg, +void optc1_get_crtc_scanoutpos( + struct timing_generator *optc, uint32_t *v_blank_start, uint32_t *v_blank_end, uint32_t *h_position, uint32_t *v_position) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); struct crtc_position position; REG_GET_2(OTG_V_BLANK_START_END, OTG_V_BLANK_START, v_blank_start, OTG_V_BLANK_END, v_blank_end); - tgn10_get_position(tg, &position); + optc1_get_position(optc, &position); *h_position = position.horizontal_count; *v_position = position.vertical_count; } - - -static void tgn10_enable_stereo(struct timing_generator *tg, +static void optc1_enable_stereo(struct timing_generator *optc, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); - - uint32_t active_width = timing->h_addressable; - uint32_t space1_size = timing->v_total - timing->v_addressable; + struct optc *optc1 = DCN10TG_FROM_TG(optc); if (flags) { uint32_t stereo_en; @@ -1121,29 +1104,23 @@ static void tgn10_enable_stereo(struct timing_generator *tg, OTG_3D_STRUCTURE_STEREO_SEL_OVR, flags->FRAME_PACKED); } - - REG_UPDATE(OPPBUF_CONTROL, - OPPBUF_ACTIVE_WIDTH, active_width); - - REG_UPDATE(OPPBUF_3D_PARAMETERS_0, - OPPBUF_3D_VACT_SPACE1_SIZE, space1_size); } -static void tgn10_program_stereo(struct timing_generator *tg, +void optc1_program_stereo(struct timing_generator *optc, const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags) { if (flags->PROGRAM_STEREO) - tgn10_enable_stereo(tg, timing, flags); + optc1_enable_stereo(optc, timing, flags); else - tgn10_disable_stereo(tg); + optc1_disable_stereo(optc); } -static bool tgn10_is_stereo_left_eye(struct timing_generator *tg) +bool optc1_is_stereo_left_eye(struct timing_generator *optc) { bool ret = false; uint32_t left_eye = 0; - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_GET(OTG_STEREO_STATUS, OTG_STEREO_CURRENT_EYE, &left_eye); @@ -1155,7 +1132,7 @@ static bool tgn10_is_stereo_left_eye(struct timing_generator *tg) return ret; } -void tgn10_read_otg_state(struct dcn10_timing_generator *tgn10, +void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s) { REG_GET(OTG_CONTROL, @@ -1199,17 +1176,22 @@ void tgn10_read_otg_state(struct dcn10_timing_generator *tgn10, OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status); } -static void tgn10_tg_init(struct timing_generator *tg) +static void optc1_clear_optc_underflow(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); - tgn10_set_blank_data_double_buffer(tg, true); REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1); } -static bool tgn10_is_tg_enabled(struct timing_generator *tg) +static void optc1_tg_init(struct timing_generator *optc) +{ + optc1_set_blank_data_double_buffer(optc, true); + optc1_clear_optc_underflow(optc); +} + +static bool optc1_is_tg_enabled(struct timing_generator *optc) { - struct dcn10_timing_generator *tgn10 = DCN10TG_FROM_TG(tg); + struct optc *optc1 = DCN10TG_FROM_TG(optc); uint32_t otg_enabled = 0; REG_GET(OTG_CONTROL, OTG_MASTER_EN, &otg_enabled); @@ -1217,50 +1199,65 @@ static bool tgn10_is_tg_enabled(struct timing_generator *tg) return (otg_enabled != 0); } + +static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + uint32_t underflow_occurred = 0; + + REG_GET(OPTC_INPUT_GLOBAL_CONTROL, + OPTC_UNDERFLOW_OCCURRED_STATUS, + &underflow_occurred); + + return (underflow_occurred == 1); +} + static const struct timing_generator_funcs dcn10_tg_funcs = { - .validate_timing = tgn10_validate_timing, - .program_timing = tgn10_program_timing, - .program_global_sync = tgn10_program_global_sync, - .enable_crtc = tgn10_enable_crtc, - .disable_crtc = tgn10_disable_crtc, + .validate_timing = optc1_validate_timing, + .program_timing = optc1_program_timing, + .program_global_sync = optc1_program_global_sync, + .enable_crtc = optc1_enable_crtc, + .disable_crtc = optc1_disable_crtc, /* used by enable_timing_synchronization. Not need for FPGA */ - .is_counter_moving = tgn10_is_counter_moving, - .get_position = tgn10_get_position, - .get_frame_count = tgn10_get_vblank_counter, - .get_scanoutpos = tgn10_get_crtc_scanoutpos, - .set_early_control = tgn10_set_early_control, + .is_counter_moving = optc1_is_counter_moving, + .get_position = optc1_get_position, + .get_frame_count = optc1_get_vblank_counter, + .get_scanoutpos = optc1_get_crtc_scanoutpos, + .set_early_control = optc1_set_early_control, /* used by enable_timing_synchronization. Not need for FPGA */ - .wait_for_state = tgn10_wait_for_state, - .set_blank = tgn10_set_blank, - .is_blanked = tgn10_is_blanked, - .set_blank_color = tgn10_program_blank_color, - .did_triggered_reset_occur = tgn10_did_triggered_reset_occur, - .enable_reset_trigger = tgn10_enable_reset_trigger, - .enable_crtc_reset = tgn10_enable_crtc_reset, - .disable_reset_trigger = tgn10_disable_reset_trigger, - .lock = tgn10_lock, - .unlock = tgn10_unlock, - .enable_optc_clock = tgn10_enable_optc_clock, - .set_drr = tgn10_set_drr, - .set_static_screen_control = tgn10_set_static_screen_control, - .set_test_pattern = tgn10_set_test_pattern, - .program_stereo = tgn10_program_stereo, - .is_stereo_left_eye = tgn10_is_stereo_left_eye, - .set_blank_data_double_buffer = tgn10_set_blank_data_double_buffer, - .tg_init = tgn10_tg_init, - .is_tg_enabled = tgn10_is_tg_enabled, + .wait_for_state = optc1_wait_for_state, + .set_blank = optc1_set_blank, + .is_blanked = optc1_is_blanked, + .set_blank_color = optc1_program_blank_color, + .did_triggered_reset_occur = optc1_did_triggered_reset_occur, + .enable_reset_trigger = optc1_enable_reset_trigger, + .enable_crtc_reset = optc1_enable_crtc_reset, + .disable_reset_trigger = optc1_disable_reset_trigger, + .lock = optc1_lock, + .unlock = optc1_unlock, + .enable_optc_clock = optc1_enable_optc_clock, + .set_drr = optc1_set_drr, + .set_static_screen_control = optc1_set_static_screen_control, + .set_test_pattern = optc1_set_test_pattern, + .program_stereo = optc1_program_stereo, + .is_stereo_left_eye = optc1_is_stereo_left_eye, + .set_blank_data_double_buffer = optc1_set_blank_data_double_buffer, + .tg_init = optc1_tg_init, + .is_tg_enabled = optc1_is_tg_enabled, + .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred, + .clear_optc_underflow = optc1_clear_optc_underflow, }; -void dcn10_timing_generator_init(struct dcn10_timing_generator *tgn10) +void dcn10_timing_generator_init(struct optc *optc1) { - tgn10->base.funcs = &dcn10_tg_funcs; + optc1->base.funcs = &dcn10_tg_funcs; - tgn10->max_h_total = tgn10->tg_mask->OTG_H_TOTAL + 1; - tgn10->max_v_total = tgn10->tg_mask->OTG_V_TOTAL + 1; + optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1; + optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1; - tgn10->min_h_blank = 32; - tgn10->min_v_blank = 3; - tgn10->min_v_blank_interlace = 5; - tgn10->min_h_sync_width = 8; - tgn10->min_v_sync_width = 1; + optc1->min_h_blank = 32; + optc1->min_v_blank = 3; + optc1->min_v_blank_interlace = 5; + optc1->min_h_sync_width = 8; + optc1->min_v_sync_width = 1; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h similarity index 83% rename from drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.h rename to drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index bb1cbfdc355476583ed2733a15861b3f6c4c674d..a3c7c2012f057f431f5ce8e9a4d24942f3dc4d61 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -29,7 +29,7 @@ #include "timing_generator.h" #define DCN10TG_FROM_TG(tg)\ - container_of(tg, struct dcn10_timing_generator, base) + container_of(tg, struct optc, base) #define TG_COMMON_REG_LIST_DCN(inst) \ SRI(OTG_VSTARTUP_PARAM, OTG, inst),\ @@ -70,8 +70,6 @@ SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\ SRI(OPTC_DATA_SOURCE_SELECT, ODM, inst),\ SRI(OPTC_INPUT_GLOBAL_CONTROL, ODM, inst),\ - SRI(OPPBUF_CONTROL, OPPBUF, inst),\ - SRI(OPPBUF_3D_PARAMETERS_0, OPPBUF, inst),\ SRI(CONTROL, VTG, inst),\ SRI(OTG_VERT_SYNC_CONTROL, OTG, inst),\ SRI(OTG_MASTER_UPDATE_MODE, OTG, inst),\ @@ -84,7 +82,7 @@ SRI(OTG_TEST_PATTERN_COLOR, OTG, inst) -struct dcn_tg_registers { +struct dcn_optc_registers { uint32_t OTG_VERT_SYNC_CONTROL; uint32_t OTG_MASTER_UPDATE_MODE; uint32_t OTG_GSL_CONTROL; @@ -129,9 +127,11 @@ struct dcn_tg_registers { uint32_t OPTC_INPUT_CLOCK_CONTROL; uint32_t OPTC_DATA_SOURCE_SELECT; uint32_t OPTC_INPUT_GLOBAL_CONTROL; - uint32_t OPPBUF_CONTROL; - uint32_t OPPBUF_3D_PARAMETERS_0; uint32_t CONTROL; + uint32_t OTG_GSL_WINDOW_X; + uint32_t OTG_GSL_WINDOW_Y; + uint32_t OTG_VUPDATE_KEEPOUT; + uint32_t OTG_DSC_START_POSITION; }; #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\ @@ -211,8 +211,6 @@ struct dcn_tg_registers { SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_GATE_DIS, mask_sh),\ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_OCCURRED_STATUS, mask_sh),\ SF(ODM0_OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, mask_sh),\ - SF(OPPBUF0_OPPBUF_CONTROL, OPPBUF_ACTIVE_WIDTH, mask_sh),\ - SF(OPPBUF0_OPPBUF_3D_PARAMETERS_0, OPPBUF_3D_VACT_SPACE1_SIZE, mask_sh),\ SF(VTG0_CONTROL, VTG0_ENABLE, mask_sh),\ SF(VTG0_CONTROL, VTG0_FP2, mask_sh),\ SF(VTG0_CONTROL, VTG0_VCOUNT_INIT, mask_sh),\ @@ -332,8 +330,6 @@ struct dcn_tg_registers { type OPTC_SEG0_SRC_SEL;\ type OPTC_UNDERFLOW_OCCURRED_STATUS;\ type OPTC_UNDERFLOW_CLEAR;\ - type OPPBUF_ACTIVE_WIDTH;\ - type OPPBUF_3D_VACT_SPACE1_SIZE;\ type VTG0_ENABLE;\ type VTG0_FP2;\ type VTG0_VCOUNT_INIT;\ @@ -346,22 +342,35 @@ struct dcn_tg_registers { type OTG_GSL2_EN;\ type OTG_GSL_MASTER_EN;\ type OTG_GSL_FORCE_DELAY;\ - type OTG_GSL_CHECK_ALL_FIELDS; + type OTG_GSL_CHECK_ALL_FIELDS;\ + type OTG_GSL_WINDOW_START_X;\ + type OTG_GSL_WINDOW_END_X;\ + type OTG_GSL_WINDOW_START_Y;\ + type OTG_GSL_WINDOW_END_Y;\ + type OTG_RANGE_TIMING_DBUF_UPDATE_MODE;\ + type OTG_GSL_MASTER_MODE;\ + type OTG_MASTER_UPDATE_LOCK_GSL_EN;\ + type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET;\ + type MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET;\ + type OTG_DSC_START_POSITION_X;\ + type OTG_DSC_START_POSITION_LINE_NUM;\ + type OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN; -struct dcn_tg_shift { + +struct dcn_optc_shift { TG_REG_FIELD_LIST(uint8_t) }; -struct dcn_tg_mask { +struct dcn_optc_mask { TG_REG_FIELD_LIST(uint32_t) }; -struct dcn10_timing_generator { +struct optc { struct timing_generator base; - const struct dcn_tg_registers *tg_regs; - const struct dcn_tg_shift *tg_shift; - const struct dcn_tg_mask *tg_mask; + const struct dcn_optc_registers *tg_regs; + const struct dcn_optc_shift *tg_shift; + const struct dcn_optc_mask *tg_mask; enum controller_id controller_id; @@ -376,7 +385,7 @@ struct dcn10_timing_generator { uint32_t min_v_blank_interlace; }; -void dcn10_timing_generator_init(struct dcn10_timing_generator *tg); +void dcn10_timing_generator_init(struct optc *optc); struct dcn_otg_state { uint32_t v_blank_start; @@ -397,7 +406,77 @@ struct dcn_otg_state { uint32_t otg_enabled; }; -void tgn10_read_otg_state(struct dcn10_timing_generator *tgn10, +void optc1_read_otg_state(struct optc *optc1, struct dcn_otg_state *s); +bool optc1_validate_timing( + struct timing_generator *optc, + const struct dc_crtc_timing *timing); + +void optc1_program_timing( + struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing, + bool use_vbios); + +void optc1_program_global_sync( + struct timing_generator *optc); + +bool optc1_disable_crtc(struct timing_generator *optc); + +bool optc1_is_counter_moving(struct timing_generator *optc); + +void optc1_get_position(struct timing_generator *optc, + struct crtc_position *position); + +uint32_t optc1_get_vblank_counter(struct timing_generator *optc); + +void optc1_get_crtc_scanoutpos( + struct timing_generator *optc, + uint32_t *v_blank_start, + uint32_t *v_blank_end, + uint32_t *h_position, + uint32_t *v_position); + +void optc1_set_early_control( + struct timing_generator *optc, + uint32_t early_cntl); + +void optc1_wait_for_state(struct timing_generator *optc, + enum crtc_state state); + +void optc1_set_blank(struct timing_generator *optc, + bool enable_blanking); + +bool optc1_is_blanked(struct timing_generator *optc); + +void optc1_program_blank_color( + struct timing_generator *optc, + const struct tg_color *black_color); + +bool optc1_did_triggered_reset_occur( + struct timing_generator *optc); + +void optc1_enable_reset_trigger(struct timing_generator *optc, int source_tg_inst); + +void optc1_disable_reset_trigger(struct timing_generator *optc); + +void optc1_lock(struct timing_generator *optc); + +void optc1_unlock(struct timing_generator *optc); + +void optc1_enable_optc_clock(struct timing_generator *optc, bool enable); + +void optc1_set_drr( + struct timing_generator *optc, + const struct drr_params *params); + +void optc1_set_static_screen_control( + struct timing_generator *optc, + uint32_t value); + +void optc1_program_stereo(struct timing_generator *optc, + const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags); + +bool optc1_is_stereo_left_eye(struct timing_generator *optc); + #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 10cce51d31d2a08b87e5e7675d4d8a4664d802fc..44825e2c9ebb5fb3247c86c7ab0213e3984a13c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -34,7 +34,7 @@ #include "dcn10/dcn10_mpc.h" #include "irq/dcn10/irq_service_dcn10.h" #include "dcn10/dcn10_dpp.h" -#include "dcn10/dcn10_timing_generator.h" +#include "dcn10_optc.h" #include "dcn10/dcn10_hw_sequencer.h" #include "dce110/dce110_hw_sequencer.h" #include "dcn10/dcn10_opp.h" @@ -348,18 +348,18 @@ static const struct dcn_mpc_mask mpc_mask = { #define tg_regs(id)\ [id] = {TG_COMMON_REG_LIST_DCN1_0(id)} -static const struct dcn_tg_registers tg_regs[] = { +static const struct dcn_optc_registers tg_regs[] = { tg_regs(0), tg_regs(1), tg_regs(2), tg_regs(3), }; -static const struct dcn_tg_shift tg_shift = { +static const struct dcn_optc_shift tg_shift = { TG_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT) }; -static const struct dcn_tg_mask tg_mask = { +static const struct dcn_optc_mask tg_mask = { TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; @@ -553,8 +553,8 @@ static struct timing_generator *dcn10_timing_generator_create( struct dc_context *ctx, uint32_t instance) { - struct dcn10_timing_generator *tgn10 = - kzalloc(sizeof(struct dcn10_timing_generator), GFP_KERNEL); + struct optc *tgn10 = + kzalloc(sizeof(struct optc), GFP_KERNEL); if (!tgn10) return NULL; @@ -678,6 +678,7 @@ static struct dce_hwseq *dcn10_hwseq_create( hws->shifts = &hwseq_shift; hws->masks = &hwseq_mask; hws->wa.DEGVIDCN10_253 = true; + hws->wa.false_optc_underflow = true; } return hws; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 87bab8e8139fbb05e960a88d989455ef48371ae3..3488af2b5786cd0a299df91da12b8e4b2ed3f843 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'utils' sub-component of DAL. # It provides the general basic services required by other DAL # subcomponents. diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c index 4c31fa54af3902509707c9e75d33d80fa7e47402..c109b2c34c8fc718f1bc88ddd3871d94fce474f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c @@ -35,35 +35,6 @@ static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum soc->writeback_latency_us = 12.0; soc->ideal_dram_bw_after_urgent_percent = 80.0; soc->max_request_size_bytes = 256; - - soc->vmin.dcfclk_mhz = 300.0; - soc->vmin.dispclk_mhz = 608.0; - soc->vmin.dppclk_mhz = 435.0; - soc->vmin.dram_bw_per_chan_gbps = 12.8; - soc->vmin.phyclk_mhz = 540.0; - soc->vmin.socclk_mhz = 208.0; - - soc->vmid.dcfclk_mhz = 600.0; - soc->vmid.dispclk_mhz = 661.0; - soc->vmid.dppclk_mhz = 661.0; - soc->vmid.dram_bw_per_chan_gbps = 12.8; - soc->vmid.phyclk_mhz = 540.0; - soc->vmid.socclk_mhz = 208.0; - - soc->vnom.dcfclk_mhz = 600.0; - soc->vnom.dispclk_mhz = 661.0; - soc->vnom.dppclk_mhz = 661.0; - soc->vnom.dram_bw_per_chan_gbps = 38.4; - soc->vnom.phyclk_mhz = 810; - soc->vnom.socclk_mhz = 208.0; - - soc->vmax.dcfclk_mhz = 600.0; - soc->vmax.dispclk_mhz = 1086.0; - soc->vmax.dppclk_mhz = 661.0; - soc->vmax.dram_bw_per_chan_gbps = 38.4; - soc->vmax.phyclk_mhz = 810.0; - soc->vmax.socclk_mhz = 208.0; - soc->downspread_percent = 0.5; soc->dram_page_open_time_ns = 50.0; soc->dram_rw_turnaround_time_ns = 17.5; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 2d9d6298f0d320da80c601425e6c287e6d8f1e30..aeebd8bee6289e1432ed37ca02ccc2f3cc764946 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -79,10 +79,6 @@ struct _vcs_dpi_soc_bounding_box_st { double writeback_latency_us; double ideal_dram_bw_after_urgent_percent; unsigned int max_request_size_bytes; - struct _vcs_dpi_voltage_scaling_st vmin; - struct _vcs_dpi_voltage_scaling_st vmid; - struct _vcs_dpi_voltage_scaling_st vnom; - struct _vcs_dpi_voltage_scaling_st vmax; double downspread_percent; double dram_page_open_time_ns; double dram_rw_turnaround_time_ns; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 1f337ecfeab049d9241f65a1b1d2c5b80b95cc76..260e113fcc02682f8e731e2c0ec0ed621b77e0b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -28,6 +28,15 @@ #include "dml_inline_defs.h" +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c index 8ba962df42e6057fb51a4d6cb7de299676955668..325dd2b757d6b1d4228acb1c3f73c2c2002d8e80 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_calc.c @@ -27,6 +27,15 @@ #include "display_mode_vba.h" #include "display_rq_dlg_calc.h" +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + static void calculate_ttu_cursor(struct display_mode_lib *mode_lib, double *refcyc_per_req_delivery_pre_cur, double *refcyc_per_req_delivery_cur, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index 1e4b1e38340123708b9b009f6b3d580eb3e2bb8e..c2037daa8e6666d7b2dd9b76477e03f7472e9220 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -28,6 +28,15 @@ #include "dml_inline_defs.h" +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + static unsigned int get_bytes_per_element(enum source_format_class source_format, bool is_chroma) { unsigned int ret_val = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c b/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c index bc7d8c7072211c94ba10aec24784c157388ce0c7..324239c779583b26c4d781f8673daa81b1f02b42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c +++ b/drivers/gpu/drm/amd/display/dc/dml/soc_bounding_box.c @@ -27,6 +27,16 @@ #include "dc_features.h" #include "dml_inline_defs.h" + +/* + * NOTE: + * This file is gcc-parseable HW gospel, coming straight from HW engineers. + * + * It doesn't adhere to Linux kernel style and sometimes will do things in odd + * ways. Unless there is something clearly wrong with it the code should + * remain as-is as it provides us with a guarantee from HW that it is correct. + */ + void dml_socbb_set_latencies(soc_bounding_box_st *to_box, soc_bounding_box_st *from_box) { to_box->dram_clock_change_latency_us = from_box->dram_clock_change_latency_us; diff --git a/drivers/gpu/drm/amd/display/dc/gpio/Makefile b/drivers/gpu/drm/amd/display/dc/gpio/Makefile index 70d01a9e96760ac2828dd5537cefe63e53ac3427..562ee189d780c4de20d150a16497c27ef44300a8 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/Makefile +++ b/drivers/gpu/drm/amd/display/dc/gpio/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'gpio' sub-component of DAL. # It provides the control and status of HW GPIO pins. diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/Makefile b/drivers/gpu/drm/amd/display/dc/i2caux/Makefile index 55603400acd99128c0957ebe7ee19d610283003f..352885cb4d0763dd3bb4912e59722abd9ee4589c 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/Makefile +++ b/drivers/gpu/drm/amd/display/dc/i2caux/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'i2c' sub-component of DAL. # It provides the control and status of HW i2c engine of the adapter. diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index d680b565af6fa529bac934ac86f5e58768f60413..d6971054ec07e93f21637701ee81486ea0973387 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -212,7 +212,6 @@ struct pipe_ctx { struct _vcs_dpi_display_rq_regs_st rq_regs; struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param; #endif - struct dwbc *dwbc; }; struct resource_context { @@ -241,6 +240,7 @@ struct dce_bw_output { struct dcn_bw_clocks { int dispclk_khz; + int dppclk_khz; bool dppclk_div; int dcfclk_khz; int dcfclk_deep_sleep_khz; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 1e231f6de73247212914e126312966b9f4588c65..132d18d4b29383c10977d3fc13096b87204dd3b8 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -349,10 +349,10 @@ struct dcn_bw_internal_vars { float dst_x_after_scaler; float dst_y_after_scaler; float time_calc; - float v_update_offset[number_of_planes_minus_one + 1]; + float v_update_offset[number_of_planes_minus_one + 1][2]; float total_repeater_delay; - float v_update_width[number_of_planes_minus_one + 1]; - float v_ready_offset[number_of_planes_minus_one + 1]; + float v_update_width[number_of_planes_minus_one + 1][2]; + float v_ready_offset[number_of_planes_minus_one + 1][2]; float time_setup; float extra_latency; float maximum_vstartup; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index 48217ecfabd418bbcb75bdd6f2504babd95a5746..a83a484946133d00311c8f1219ba2bcf17527805 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -50,9 +50,9 @@ struct abm_funcs { bool (*set_backlight_level)(struct abm *abm, unsigned int backlight_level, unsigned int frame_ramp, - unsigned int controller_id); + unsigned int controller_id, + bool use_smooth_brightness); unsigned int (*get_current_backlight_8_bit)(struct abm *abm); - bool (*is_dmcu_initialized)(struct abm *abm); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index b59712b41b8139ee19828c366ce4581569c84185..ce206355461b1e4496b80f76dd5e42a1ad38a11f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -63,6 +63,7 @@ struct dmcu_funcs { unsigned int wait_loop_number); void (*get_psr_wait_loop)(struct dmcu *dmcu, unsigned int *psr_wait_loop_number); + bool (*is_dmcu_initialized)(struct dmcu *dmcu); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h index ccb4896975c2430c9c00cab50ced81250b3d2ab4..25edbde6163eed65206e07b2d4e594b47210af4c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h @@ -68,7 +68,7 @@ struct dpp_funcs { void (*dpp_set_csc_adjustment)( struct dpp *dpp, - const struct out_csc_color_matrix *tbl_entry); + const uint16_t *regval); void (*dpp_power_on_regamma_lut)( struct dpp *dpp, @@ -122,7 +122,7 @@ struct dpp_funcs { void (*set_cursor_attributes)( struct dpp *dpp_base, - const struct dc_cursor_attributes *attr); + enum dc_cursor_color_format color_format); void (*set_cursor_position)( struct dpp *dpp_base, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 49b12f602e79c4f58cb338f4bc472a779480995b..b7c7e70022e43c3a3ca0feb35ac793de0cea1c3a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -28,6 +28,20 @@ #include "mem_input.h" + +enum cursor_pitch { + CURSOR_PITCH_64_PIXELS = 0, + CURSOR_PITCH_128_PIXELS, + CURSOR_PITCH_256_PIXELS +}; + +enum cursor_lines_per_chunk { + CURSOR_LINE_PER_CHUNK_2 = 1, + CURSOR_LINE_PER_CHUNK_4, + CURSOR_LINE_PER_CHUNK_8, + CURSOR_LINE_PER_CHUNK_16 +}; + struct hubp { struct hubp_funcs *funcs; struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index ddc56700109b36e25280ab5cc90665a21cf03ad1..e3f0b4056318f0bc61de80754db0d6fb6d755721 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -126,36 +126,12 @@ struct default_adjustment { bool force_hw_default; }; -struct out_csc_color_matrix { - enum dc_color_space color_space; - uint16_t regval[12]; -}; -struct output_csc_matrix { +struct out_csc_color_matrix { enum dc_color_space color_space; uint16_t regval[12]; }; -static const struct output_csc_matrix output_csc_matrix[] = { - { COLOR_SPACE_SRGB, - { 0x2000, 0, 0, 0, 0, 0x2000, 0, 0, 0, 0, 0x2000, 0} }, - { COLOR_SPACE_SRGB_LIMITED, - { 0x1B67, 0, 0, 0x201, 0, 0x1B67, 0, 0x201, 0, 0, 0x1B67, 0x201} }, - { COLOR_SPACE_YCBCR601, - { 0xE04, 0xF444, 0xFDB9, 0x1004, 0x831, 0x1016, 0x320, 0x201, 0xFB45, - 0xF6B7, 0xE04, 0x1004} }, - { COLOR_SPACE_YCBCR709, - { 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA, - 0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} }, - - /* TODO: correct values below */ - { COLOR_SPACE_YCBCR601_LIMITED, - { 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991, - 0x12C9, 0x3A6, 0x200, 0xFB47, 0xF6B9, 0xE00, 0x1000} }, - { COLOR_SPACE_YCBCR709_LIMITED, - { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, - 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, -}; enum opp_regamma { OPP_REGAMMA_BYPASS = 0, @@ -178,4 +154,41 @@ struct dc_bias_and_scale { uint16_t bias_blue; }; +enum test_pattern_dyn_range { + TEST_PATTERN_DYN_RANGE_VESA = 0, + TEST_PATTERN_DYN_RANGE_CEA +}; + +enum test_pattern_mode { + TEST_PATTERN_MODE_COLORSQUARES_RGB = 0, + TEST_PATTERN_MODE_COLORSQUARES_YCBCR601, + TEST_PATTERN_MODE_COLORSQUARES_YCBCR709, + TEST_PATTERN_MODE_VERTICALBARS, + TEST_PATTERN_MODE_HORIZONTALBARS, + TEST_PATTERN_MODE_SINGLERAMP_RGB, + TEST_PATTERN_MODE_DUALRAMP_RGB +}; + +enum test_pattern_color_format { + TEST_PATTERN_COLOR_FORMAT_BPC_6 = 0, + TEST_PATTERN_COLOR_FORMAT_BPC_8, + TEST_PATTERN_COLOR_FORMAT_BPC_10, + TEST_PATTERN_COLOR_FORMAT_BPC_12 +}; + +enum controller_dp_test_pattern { + CONTROLLER_DP_TEST_PATTERN_D102 = 0, + CONTROLLER_DP_TEST_PATTERN_SYMBOLERROR, + CONTROLLER_DP_TEST_PATTERN_PRBS7, + CONTROLLER_DP_TEST_PATTERN_COLORSQUARES, + CONTROLLER_DP_TEST_PATTERN_VERTICALBARS, + CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS, + CONTROLLER_DP_TEST_PATTERN_COLORRAMP, + CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, + CONTROLLER_DP_TEST_PATTERN_RESERVED_8, + CONTROLLER_DP_TEST_PATTERN_RESERVED_9, + CONTROLLER_DP_TEST_PATTERN_RESERVED_A, + CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA +}; + #endif /* __DAL_HW_SHARED_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index 8a08f0a97f94d35f6ef996f0dc1d9f822eaa8697..0fd329deacd8a047c6302e092075972698191fff 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -1,3 +1,25 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ /* * link_encoder.h * diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 72ea33526a5c8851d6e34d6ca89c62da1b5c7cc1..23a8d5e53a89d695136a26c3144529752f17a0f0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -26,7 +26,10 @@ #define __DC_MPCC_H__ #include "dc_hw_types.h" -#include "opp.h" +#include "hw_shared.h" + +#define MAX_MPCC 6 +#define MAX_OPP 6 enum mpc_output_csc_mode { MPC_OUTPUT_CSC_DISABLE = 0, @@ -34,45 +37,151 @@ enum mpc_output_csc_mode { MPC_OUTPUT_CSC_COEF_B }; -struct mpcc_cfg { - int dpp_id; - int opp_id; - struct mpc_tree_cfg *tree_cfg; - unsigned int z_index; - struct tg_color black_color; - bool per_pixel_alpha; - bool pre_multiplied_alpha; +enum mpcc_blend_mode { + MPCC_BLEND_MODE_BYPASS, + MPCC_BLEND_MODE_TOP_LAYER_PASSTHROUGH, + MPCC_BLEND_MODE_TOP_LAYER_ONLY, + MPCC_BLEND_MODE_TOP_BOT_BLENDING +}; + +enum mpcc_alpha_blend_mode { + MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA, + MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN, + MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA +}; + +/* + * MPCC blending configuration + */ +struct mpcc_blnd_cfg { + struct tg_color black_color; /* background color */ + enum mpcc_alpha_blend_mode alpha_mode; /* alpha blend mode */ + bool pre_multiplied_alpha; /* alpha pre-multiplied mode flag */ + int global_gain; + int global_alpha; + bool overlap_only; + +}; + +struct mpcc_sm_cfg { + bool enable; + /* 0-single plane,2-row subsampling,4-column subsampling,6-checkboard subsampling */ + int sm_mode; + /* 0- disable frame alternate, 1- enable frame alternate */ + bool frame_alt; + /* 0- disable field alternate, 1- enable field alternate */ + bool field_alt; + /* 0-no force,2-force frame polarity from top,3-force frame polarity from bottom */ + int force_next_frame_porlarity; + /* 0-no force,2-force field polarity from top,3-force field polarity from bottom */ + int force_next_field_polarity; +}; + +/* + * MPCC connection and blending configuration for a single MPCC instance. + * This struct is used as a node in an MPC tree. + */ +struct mpcc { + int mpcc_id; /* MPCC physical instance */ + int dpp_id; /* DPP input to this MPCC */ + struct mpcc *mpcc_bot; /* pointer to bottom layer MPCC. NULL when not connected */ + struct mpcc_blnd_cfg blnd_cfg; /* The blending configuration for this MPCC */ + struct mpcc_sm_cfg sm_cfg; /* stereo mix setting for this MPCC */ +}; + +/* + * MPC tree represents all MPCC connections for a pipe. + */ +struct mpc_tree { + int opp_id; /* The OPP instance that owns this MPC tree */ + struct mpcc *opp_list; /* The top MPCC layer of the MPC tree that outputs to OPP endpoint */ }; struct mpc { const struct mpc_funcs *funcs; struct dc_context *ctx; + + struct mpcc mpcc_array[MAX_MPCC]; }; struct mpc_funcs { - int (*add)(struct mpc *mpc, struct mpcc_cfg *cfg); + /* + * Insert DPP into MPC tree based on specified blending position. + * Only used for planes that are part of blending chain for OPP output + * + * Parameters: + * [in/out] mpc - MPC context. + * [in/out] tree - MPC tree structure that plane will be added to. + * [in] blnd_cfg - MPCC blending configuration for the new blending layer. + * [in] sm_cfg - MPCC stereo mix configuration for the new blending layer. + * stereo mix must disable for the very bottom layer of the tree config. + * [in] insert_above_mpcc - Insert new plane above this MPCC. If NULL, insert as bottom plane. + * [in] dpp_id - DPP instance for the plane to be added. + * [in] mpcc_id - The MPCC physical instance to use for blending. + * + * Return: struct mpcc* - MPCC that was added. + */ + struct mpcc* (*insert_plane)( + struct mpc *mpc, + struct mpc_tree *tree, + struct mpcc_blnd_cfg *blnd_cfg, + struct mpcc_sm_cfg *sm_cfg, + struct mpcc *insert_above_mpcc, + int dpp_id, + int mpcc_id); - void (*remove)(struct mpc *mpc, - struct mpc_tree_cfg *tree_cfg, - int opp_id, - int mpcc_inst); + /* + * Remove a specified MPCC from the MPC tree. + * + * Parameters: + * [in/out] mpc - MPC context. + * [in/out] tree - MPC tree structure that plane will be removed from. + * [in/out] mpcc - MPCC to be removed from tree. + * + * Return: void + */ + void (*remove_mpcc)( + struct mpc *mpc, + struct mpc_tree *tree, + struct mpcc *mpcc); - void (*wait_for_idle)(struct mpc *mpc, int id); + /* + * Reset the MPCC HW status by disconnecting all muxes. + * + * Parameters: + * [in/out] mpc - MPC context. + * + * Return: void + */ + void (*mpc_init)(struct mpc *mpc); - void (*update_blend_mode)(struct mpc *mpc, struct mpcc_cfg *cfg); + /* + * Update the blending configuration for a specified MPCC. + * + * Parameters: + * [in/out] mpc - MPC context. + * [in] blnd_cfg - MPCC blending configuration. + * [in] mpcc_id - The MPCC physical instance. + * + * Return: void + */ + void (*update_blending)( + struct mpc *mpc, + struct mpcc_blnd_cfg *blnd_cfg, + int mpcc_id); - int (*get_opp_id)(struct mpc *mpc, int mpcc_id); + struct mpcc* (*get_mpcc_for_dpp)( + struct mpc_tree *tree, + int dpp_id); + + void (*wait_for_idle)(struct mpc *mpc, int id); - void (*set_output_csc)(struct mpc *mpc, - int opp_id, - const struct out_csc_color_matrix *tbl_entry, - enum mpc_output_csc_mode ocsc_mode); + void (*assert_mpcc_idle_before_connect)(struct mpc *mpc, int mpcc_id); - void (*set_ocsc_default)(struct mpc *mpc, - int opp_id, - enum dc_color_space color_space, - enum mpc_output_csc_mode ocsc_mode); + void (*init_mpcc_list_from_hw)( + struct mpc *mpc, + struct mpc_tree *tree); }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h index 579d1059a3d467aebf79215b789b915cd47d5ec8..ab8fb77f1ae5d36d01c263694860d4d681579239 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h @@ -29,6 +29,7 @@ #include "hw_shared.h" #include "dc_hw_types.h" #include "transform.h" +#include "mpc.h" struct fixed31_32; @@ -204,7 +205,7 @@ struct output_pixel_processor { struct dc_context *ctx; uint32_t inst; struct pwl_params regamma_params; - struct mpc_tree_cfg mpc_tree; + struct mpc_tree mpc_tree_params; bool mpcc_disconnect_pending[MAX_PIPES]; const struct opp_funcs *funcs; }; @@ -248,6 +249,21 @@ enum ovl_csc_adjust_item { OVERLAY_COLOR_TEMPERATURE }; +enum oppbuf_display_segmentation { + OPPBUF_DISPLAY_SEGMENTATION_1_SEGMENT = 0, + OPPBUF_DISPLAY_SEGMENTATION_2_SEGMENT = 1, + OPPBUF_DISPLAY_SEGMENTATION_4_SEGMENT = 2, + OPPBUF_DISPLAY_SEGMENTATION_4_SEGMENT_SPLIT_LEFT = 3, + OPPBUF_DISPLAY_SEGMENTATION_4_SEGMENT_SPLIT_RIGHT = 4 +}; + +struct oppbuf_params { + uint32_t active_width; + enum oppbuf_display_segmentation mso_segmentation; + uint32_t mso_overlap_pixel_num; + uint32_t pixel_repetition; +}; + struct opp_funcs { @@ -276,26 +292,11 @@ struct opp_funcs { void (*opp_destroy)(struct output_pixel_processor **opp); - void (*opp_set_stereo_polarity)( - struct output_pixel_processor *opp, - bool enable, - bool rightEyePolarity); - - void (*opp_set_test_pattern)( - struct output_pixel_processor *opp, - bool enable); - - void (*opp_dpg_blank_enable)( - struct output_pixel_processor *opp, - bool enable, - const struct tg_color *color, - int width, - int height); - - void (*opp_convert_pti)( + void (*opp_program_stereo)( struct output_pixel_processor *opp, bool enable, - bool polarity); + const struct dc_crtc_timing *timing); + }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 3050afe8e8a96c62c8a7eb26ac70e8ed21c7fcbc..b5db1692393c9e5769f43848cc60cc4317691bde 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -1,3 +1,25 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ /* * stream_encoder.h * diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 860259913d7863336686d2ac1f2e6de17eee79df..ec312f1a3e55e97dc6a5f1f3a4c49718bbd1e2ee 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -26,6 +26,8 @@ #ifndef __DAL_TIMING_GENERATOR_TYPES_H__ #define __DAL_TIMING_GENERATOR_TYPES_H__ +#include "hw_shared.h" + struct dc_bios; /* Contains CRTC vertical/horizontal pixel counters */ @@ -40,6 +42,19 @@ struct dcp_gsl_params { int gsl_master; }; +struct gsl_params { + int gsl0_en; + int gsl1_en; + int gsl2_en; + int gsl_master_en; + int gsl_master_mode; + int master_update_lock_gsl_en; + int gsl_window_start_x; + int gsl_window_end_x; + int gsl_window_start_y; + int gsl_window_end_y; +}; + /* define the structure of Dynamic Refresh Mode */ struct drr_params { uint32_t vertical_total_min; @@ -50,43 +65,6 @@ struct drr_params { #define LEFT_EYE_3D_PRIMARY_SURFACE 1 #define RIGHT_EYE_3D_PRIMARY_SURFACE 0 -enum test_pattern_dyn_range { - TEST_PATTERN_DYN_RANGE_VESA = 0, - TEST_PATTERN_DYN_RANGE_CEA -}; - -enum test_pattern_mode { - TEST_PATTERN_MODE_COLORSQUARES_RGB = 0, - TEST_PATTERN_MODE_COLORSQUARES_YCBCR601, - TEST_PATTERN_MODE_COLORSQUARES_YCBCR709, - TEST_PATTERN_MODE_VERTICALBARS, - TEST_PATTERN_MODE_HORIZONTALBARS, - TEST_PATTERN_MODE_SINGLERAMP_RGB, - TEST_PATTERN_MODE_DUALRAMP_RGB -}; - -enum test_pattern_color_format { - TEST_PATTERN_COLOR_FORMAT_BPC_6 = 0, - TEST_PATTERN_COLOR_FORMAT_BPC_8, - TEST_PATTERN_COLOR_FORMAT_BPC_10, - TEST_PATTERN_COLOR_FORMAT_BPC_12 -}; - -enum controller_dp_test_pattern { - CONTROLLER_DP_TEST_PATTERN_D102 = 0, - CONTROLLER_DP_TEST_PATTERN_SYMBOLERROR, - CONTROLLER_DP_TEST_PATTERN_PRBS7, - CONTROLLER_DP_TEST_PATTERN_COLORSQUARES, - CONTROLLER_DP_TEST_PATTERN_VERTICALBARS, - CONTROLLER_DP_TEST_PATTERN_HORIZONTALBARS, - CONTROLLER_DP_TEST_PATTERN_COLORRAMP, - CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, - CONTROLLER_DP_TEST_PATTERN_RESERVED_8, - CONTROLLER_DP_TEST_PATTERN_RESERVED_9, - CONTROLLER_DP_TEST_PATTERN_RESERVED_A, - CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA -}; - enum crtc_state { CRTC_STATE_VBLANK = 0, CRTC_STATE_VACTIVE @@ -100,6 +78,12 @@ struct _dlg_otg_param { enum signal_type signal; }; +struct vupdate_keepout_params { + int start_offset; + int end_offset; + int enable; +}; + struct crtc_stereo_flags { uint8_t PROGRAM_STEREO : 1; uint8_t PROGRAM_POLARITY : 1; @@ -187,6 +171,8 @@ struct timing_generator_funcs { void (*tg_init)(struct timing_generator *tg); bool (*is_tg_enabled)(struct timing_generator *tg); + bool (*is_optc_underflow_occurred)(struct timing_generator *tg); + void (*clear_optc_underflow)(struct timing_generator *tg); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 5dc4ecf618ff4afe82f8c0e499b08472dc404fcb..4c0aa56f7bae255e42b18db495efba17b9868971 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -28,6 +28,7 @@ #include "dc_types.h" #include "clock_source.h" #include "inc/hw/timing_generator.h" +#include "inc/hw/opp.h" #include "inc/hw/link_encoder.h" #include "core_status.h" @@ -40,6 +41,7 @@ enum pipe_gating_control { struct dce_hwseq_wa { bool blnd_crtc_trigger; bool DEGVIDCN10_253; + bool false_optc_underflow; }; struct hwseq_wa_state { @@ -137,10 +139,6 @@ struct hw_sequencer_funcs { void (*disable_plane)(struct dc *dc, struct pipe_ctx *pipe_ctx); - void (*enable_plane)(struct dc *dc, - struct pipe_ctx *pipe, - struct dc_state *context); - void (*update_info_frame)(struct pipe_ctx *pipe_ctx); void (*enable_stream)(struct pipe_ctx *pipe_ctx); @@ -198,6 +196,7 @@ struct hw_sequencer_funcs { void (*edp_backlight_control)( struct dc_link *link, bool enable); + void (*edp_wait_for_hpd_ready)(struct dc_link *link, bool power_up); }; @@ -209,4 +208,8 @@ void color_space_to_black_color( bool hwss_wait_for_blank_complete( struct timing_generator *tg); +const uint16_t *find_color_matrix( + enum dc_color_space color_space, + uint32_t *array_size); + #endif /* __DC_HW_SEQUENCER_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/irq/Makefile b/drivers/gpu/drm/amd/display/dc/irq/Makefile index c7e93f7223bdabe032436004d6d2dd0304ebed10..498515aad4a50bf35133755d1bded3a387a0817e 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/Makefile +++ b/drivers/gpu/drm/amd/display/dc/irq/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'audio' sub-component of DAL. # It provides the control and status of HW adapter resources, # that are global for the ASIC and sharable between pipes. diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index a87c0329541f584c2c7dcff884b67ad96f4a684f..1fcbc99e63b58f1839be535c95665c682165a942 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -26,8 +26,6 @@ #ifndef _OS_TYPES_H_ #define _OS_TYPES_H_ -#if defined __KERNEL__ - #include #include #include @@ -46,14 +44,12 @@ #undef WRITE #undef FRAME_SIZE -#define dm_output_to_console(fmt, ...) DRM_INFO(fmt, ##__VA_ARGS__) +#define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__) #define dm_error(fmt, ...) DRM_ERROR(fmt, ##__VA_ARGS__) -#define dm_debug(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__) - -#define dm_vlog(fmt, args) vprintk(fmt, args) - +#if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#include #endif /* @@ -89,8 +85,4 @@ BREAK_TO_DEBUGGER(); \ } while (0) -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) -#include -#endif - #endif /* _OS_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/virtual/Makefile b/drivers/gpu/drm/amd/display/dc/virtual/Makefile index fc0b7318d9ccb239bc7127a1b735ff1f7324e3aa..07326d244d50a7333b43e0d3e02bc0dedca35f97 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/Makefile +++ b/drivers/gpu/drm/amd/display/dc/virtual/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the virtual sub-component of DAL. # It provides the control and status of HW CRTC block. diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index 3248f699daf2c986b6fb1508f9c9059604630370..4badaedbaaddea8c94278c9d974cd1a57640b21b 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -463,4 +463,11 @@ uint32_t dal_fixed31_32_u2d19( uint32_t dal_fixed31_32_u0d19( struct fixed31_32 arg); + +uint32_t dal_fixed31_32_clamp_u0d14( + struct fixed31_32 arg); + +uint32_t dal_fixed31_32_clamp_u0d10( + struct fixed31_32 arg); + #endif diff --git a/drivers/gpu/drm/amd/display/include/grph_object_id.h b/drivers/gpu/drm/amd/display/include/grph_object_id.h index 03a7a9ca95eac6de24698679790fefba984a7c2a..c4197432eb7c34542952364fc4027ff4819f63f3 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_id.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_id.h @@ -233,10 +233,6 @@ static inline struct graphics_object_id dal_graphics_object_id_init( return result; } -bool dal_graphics_object_id_is_equal( - struct graphics_object_id id1, - struct graphics_object_id id2); - /* Based on internal data members memory layout */ static inline uint32_t dal_graphics_object_id_to_uint( struct graphics_object_id id) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/Makefile b/drivers/gpu/drm/amd/display/modules/freesync/Makefile index db8e0ff6d7a9c07da7fef4ec5808f73d5bf3a1b9..fb9a499780e8c82c6a833ee424dfe1b36a302fec 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/Makefile +++ b/drivers/gpu/drm/amd/display/modules/freesync/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for the 'freesync' sub-module of DAL. # diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h index 663d3af35baf120e1636eb293b2d18b81d719762..5bf84c6d0ec362b79b22ae75589c20e679692c3d 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_default.h @@ -436,7 +436,6 @@ #define mmTA_CNTL_DEFAULT 0x8004d850 #define mmTA_CNTL_AUX_DEFAULT 0x00000000 #define mmTA_RESERVED_010C_DEFAULT 0x00000000 -#define mmTA_GRAD_ADJ_DEFAULT 0x40000040 #define mmTA_STATUS_DEFAULT 0x00000000 #define mmTA_SCRATCH_DEFAULT 0x00000000 @@ -1700,7 +1699,6 @@ #define mmDB_STENCIL_WRITE_BASE_DEFAULT 0x00000000 #define mmDB_STENCIL_WRITE_BASE_HI_DEFAULT 0x00000000 #define mmDB_DFSM_CONTROL_DEFAULT 0x00000000 -#define mmDB_RENDER_FILTER_DEFAULT 0x00000000 #define mmDB_Z_INFO2_DEFAULT 0x00000000 #define mmDB_STENCIL_INFO2_DEFAULT 0x00000000 #define mmTA_BC_BASE_ADDR_DEFAULT 0x00000000 @@ -1806,8 +1804,6 @@ #define mmPA_SC_RIGHT_VERT_GRID_DEFAULT 0x00000000 #define mmPA_SC_LEFT_VERT_GRID_DEFAULT 0x00000000 #define mmPA_SC_HORIZ_GRID_DEFAULT 0x00000000 -#define mmPA_SC_FOV_WINDOW_LR_DEFAULT 0x00000000 -#define mmPA_SC_FOV_WINDOW_TB_DEFAULT 0x00000000 #define mmVGT_MULTI_PRIM_IB_RESET_INDX_DEFAULT 0x00000000 #define mmCB_BLEND_RED_DEFAULT 0x00000000 #define mmCB_BLEND_GREEN_DEFAULT 0x00000000 @@ -2072,7 +2068,6 @@ #define mmVGT_EVENT_INITIATOR_DEFAULT 0x00000000 #define mmVGT_GS_MAX_PRIMS_PER_SUBGROUP_DEFAULT 0x00000000 #define mmVGT_DRAW_PAYLOAD_CNTL_DEFAULT 0x00000000 -#define mmVGT_INDEX_PAYLOAD_CNTL_DEFAULT 0x00000000 #define mmVGT_INSTANCE_STEP_RATE_0_DEFAULT 0x00000000 #define mmVGT_INSTANCE_STEP_RATE_1_DEFAULT 0x00000000 #define mmVGT_ESGS_RING_ITEMSIZE_DEFAULT 0x00000000 @@ -2490,7 +2485,6 @@ #define mmWD_INDEX_BUF_BASE_DEFAULT 0x00000000 #define mmWD_INDEX_BUF_BASE_HI_DEFAULT 0x00000000 #define mmIA_MULTI_VGT_PARAM_DEFAULT 0x006000ff -#define mmVGT_OBJECT_ID_DEFAULT 0x00000000 #define mmVGT_INSTANCE_BASE_ID_DEFAULT 0x00000000 #define mmPA_SU_LINE_STIPPLE_VALUE_DEFAULT 0x00000000 #define mmPA_SC_LINE_STIPPLE_STATE_DEFAULT 0x00000000 @@ -2534,7 +2528,6 @@ #define mmSQC_WRITEBACK_DEFAULT 0x00000000 #define mmTA_CS_BC_BASE_ADDR_DEFAULT 0x00000000 #define mmTA_CS_BC_BASE_ADDR_HI_DEFAULT 0x00000000 -#define mmTA_GRAD_ADJ_UCONFIG_DEFAULT 0x40000040 #define mmDB_OCCLUSION_COUNT0_LOW_DEFAULT 0x00000000 #define mmDB_OCCLUSION_COUNT0_HI_DEFAULT 0x00000000 #define mmDB_OCCLUSION_COUNT1_LOW_DEFAULT 0x00000000 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h index e6d6171aa8b9c81fbcacd39d3a42139cb068b884..4ce090db7ef776ac1042f4a0f951d2e654402a9c 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h @@ -841,8 +841,6 @@ #define mmTA_CNTL_AUX_BASE_IDX 0 #define mmTA_RESERVED_010C 0x0543 #define mmTA_RESERVED_010C_BASE_IDX 0 -#define mmTA_GRAD_ADJ 0x0544 -#define mmTA_GRAD_ADJ_BASE_IDX 0 #define mmTA_STATUS 0x0548 #define mmTA_STATUS_BASE_IDX 0 #define mmTA_SCRATCH 0x0564 @@ -3330,8 +3328,6 @@ #define mmDB_STENCIL_WRITE_BASE_HI_BASE_IDX 1 #define mmDB_DFSM_CONTROL 0x0018 #define mmDB_DFSM_CONTROL_BASE_IDX 1 -#define mmDB_RENDER_FILTER 0x0019 -#define mmDB_RENDER_FILTER_BASE_IDX 1 #define mmDB_Z_INFO2 0x001a #define mmDB_Z_INFO2_BASE_IDX 1 #define mmDB_STENCIL_INFO2 0x001b @@ -3542,10 +3538,6 @@ #define mmPA_SC_LEFT_VERT_GRID_BASE_IDX 1 #define mmPA_SC_HORIZ_GRID 0x00ea #define mmPA_SC_HORIZ_GRID_BASE_IDX 1 -#define mmPA_SC_FOV_WINDOW_LR 0x00eb -#define mmPA_SC_FOV_WINDOW_LR_BASE_IDX 1 -#define mmPA_SC_FOV_WINDOW_TB 0x00ec -#define mmPA_SC_FOV_WINDOW_TB_BASE_IDX 1 #define mmVGT_MULTI_PRIM_IB_RESET_INDX 0x0103 #define mmVGT_MULTI_PRIM_IB_RESET_INDX_BASE_IDX 1 #define mmCB_BLEND_RED 0x0105 @@ -4074,8 +4066,6 @@ #define mmVGT_GS_MAX_PRIMS_PER_SUBGROUP_BASE_IDX 1 #define mmVGT_DRAW_PAYLOAD_CNTL 0x02a6 #define mmVGT_DRAW_PAYLOAD_CNTL_BASE_IDX 1 -#define mmVGT_INDEX_PAYLOAD_CNTL 0x02a7 -#define mmVGT_INDEX_PAYLOAD_CNTL_BASE_IDX 1 #define mmVGT_INSTANCE_STEP_RATE_0 0x02a8 #define mmVGT_INSTANCE_STEP_RATE_0_BASE_IDX 1 #define mmVGT_INSTANCE_STEP_RATE_1 0x02a9 @@ -4908,8 +4898,6 @@ #define mmWD_INDEX_BUF_BASE_HI_BASE_IDX 1 #define mmIA_MULTI_VGT_PARAM 0x2258 #define mmIA_MULTI_VGT_PARAM_BASE_IDX 1 -#define mmVGT_OBJECT_ID 0x2259 -#define mmVGT_OBJECT_ID_BASE_IDX 1 #define mmVGT_INSTANCE_BASE_ID 0x225a #define mmVGT_INSTANCE_BASE_ID_BASE_IDX 1 #define mmPA_SU_LINE_STIPPLE_VALUE 0x2280 @@ -4996,8 +4984,6 @@ #define mmTA_CS_BC_BASE_ADDR_BASE_IDX 1 #define mmTA_CS_BC_BASE_ADDR_HI 0x2381 #define mmTA_CS_BC_BASE_ADDR_HI_BASE_IDX 1 -#define mmTA_GRAD_ADJ_UCONFIG 0x2382 -#define mmTA_GRAD_ADJ_UCONFIG_BASE_IDX 1 #define mmDB_OCCLUSION_COUNT0_LOW 0x23c0 #define mmDB_OCCLUSION_COUNT0_LOW_BASE_IDX 1 #define mmDB_OCCLUSION_COUNT0_HI 0x23c1 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h index 5c5e9b445432d81334a30f617dfbb17b6d671065..2e1214be67a22490284052642d06d962f9ad5438 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_sh_mask.h @@ -4576,15 +4576,6 @@ //TA_RESERVED_010C #define TA_RESERVED_010C__Unused__SHIFT 0x0 #define TA_RESERVED_010C__Unused_MASK 0xFFFFFFFFL -//TA_GRAD_ADJ -#define TA_GRAD_ADJ__GRAD_ADJ_0__SHIFT 0x0 -#define TA_GRAD_ADJ__GRAD_ADJ_1__SHIFT 0x8 -#define TA_GRAD_ADJ__GRAD_ADJ_2__SHIFT 0x10 -#define TA_GRAD_ADJ__GRAD_ADJ_3__SHIFT 0x18 -#define TA_GRAD_ADJ__GRAD_ADJ_0_MASK 0x000000FFL -#define TA_GRAD_ADJ__GRAD_ADJ_1_MASK 0x0000FF00L -#define TA_GRAD_ADJ__GRAD_ADJ_2_MASK 0x00FF0000L -#define TA_GRAD_ADJ__GRAD_ADJ_3_MASK 0xFF000000L //TA_STATUS #define TA_STATUS__FG_PFIFO_EMPTYB__SHIFT 0xc #define TA_STATUS__FG_LFIFO_EMPTYB__SHIFT 0xd @@ -14459,9 +14450,6 @@ #define DB_DFSM_CONTROL__PUNCHOUT_MODE_MASK 0x00000003L #define DB_DFSM_CONTROL__POPS_DRAIN_PS_ON_OVERLAP_MASK 0x00000004L #define DB_DFSM_CONTROL__DISALLOW_OVERFLOW_MASK 0x00000008L -//DB_RENDER_FILTER -#define DB_RENDER_FILTER__PS_INVOKE_MASK__SHIFT 0x0 -#define DB_RENDER_FILTER__PS_INVOKE_MASK_MASK 0x0000FFFFL //DB_Z_INFO2 #define DB_Z_INFO2__EPITCH__SHIFT 0x0 #define DB_Z_INFO2__EPITCH_MASK 0x0000FFFFL @@ -14959,11 +14947,9 @@ #define PA_SC_TILE_STEERING_OVERRIDE__ENABLE__SHIFT 0x0 #define PA_SC_TILE_STEERING_OVERRIDE__NUM_SE__SHIFT 0x1 #define PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SE__SHIFT 0x5 -#define PA_SC_TILE_STEERING_OVERRIDE__DISABLE_SRBSL_DB_OPTIMIZED_PACKING__SHIFT 0x8 #define PA_SC_TILE_STEERING_OVERRIDE__ENABLE_MASK 0x00000001L #define PA_SC_TILE_STEERING_OVERRIDE__NUM_SE_MASK 0x00000006L #define PA_SC_TILE_STEERING_OVERRIDE__NUM_RB_PER_SE_MASK 0x00000060L -#define PA_SC_TILE_STEERING_OVERRIDE__DISABLE_SRBSL_DB_OPTIMIZED_PACKING_MASK 0x00000100L //CP_PERFMON_CNTX_CNTL #define CP_PERFMON_CNTX_CNTL__PERFMON_ENABLE__SHIFT 0x1f #define CP_PERFMON_CNTX_CNTL__PERFMON_ENABLE_MASK 0x80000000L @@ -15003,20 +14989,6 @@ #define PA_SC_HORIZ_GRID__TOP_HALF_MASK 0x0000FF00L #define PA_SC_HORIZ_GRID__BOT_HALF_MASK 0x00FF0000L #define PA_SC_HORIZ_GRID__BOT_QTR_MASK 0xFF000000L -//PA_SC_FOV_WINDOW_LR -#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_LEFT__SHIFT 0x0 -#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_RIGHT__SHIFT 0x8 -#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_LEFT__SHIFT 0x10 -#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_RIGHT__SHIFT 0x18 -#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_LEFT_MASK 0x000000FFL -#define PA_SC_FOV_WINDOW_LR__LEFT_EYE_FOV_RIGHT_MASK 0x0000FF00L -#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_LEFT_MASK 0x00FF0000L -#define PA_SC_FOV_WINDOW_LR__RIGHT_EYE_FOV_RIGHT_MASK 0xFF000000L -//PA_SC_FOV_WINDOW_TB -#define PA_SC_FOV_WINDOW_TB__FOV_TOP__SHIFT 0x0 -#define PA_SC_FOV_WINDOW_TB__FOV_BOT__SHIFT 0x8 -#define PA_SC_FOV_WINDOW_TB__FOV_TOP_MASK 0x000000FFL -#define PA_SC_FOV_WINDOW_TB__FOV_BOT_MASK 0x0000FF00L //VGT_MULTI_PRIM_IB_RESET_INDX #define VGT_MULTI_PRIM_IB_RESET_INDX__RESET_INDX__SHIFT 0x0 #define VGT_MULTI_PRIM_IB_RESET_INDX__RESET_INDX_MASK 0xFFFFFFFFL @@ -17010,13 +16982,11 @@ #define PA_SU_SMALL_PRIM_FILTER_CNTL__LINE_FILTER_DISABLE__SHIFT 0x2 #define PA_SU_SMALL_PRIM_FILTER_CNTL__POINT_FILTER_DISABLE__SHIFT 0x3 #define PA_SU_SMALL_PRIM_FILTER_CNTL__RECTANGLE_FILTER_DISABLE__SHIFT 0x4 -#define PA_SU_SMALL_PRIM_FILTER_CNTL__SRBSL_ENABLE__SHIFT 0x5 #define PA_SU_SMALL_PRIM_FILTER_CNTL__SMALL_PRIM_FILTER_ENABLE_MASK 0x00000001L #define PA_SU_SMALL_PRIM_FILTER_CNTL__TRIANGLE_FILTER_DISABLE_MASK 0x00000002L #define PA_SU_SMALL_PRIM_FILTER_CNTL__LINE_FILTER_DISABLE_MASK 0x00000004L #define PA_SU_SMALL_PRIM_FILTER_CNTL__POINT_FILTER_DISABLE_MASK 0x00000008L #define PA_SU_SMALL_PRIM_FILTER_CNTL__RECTANGLE_FILTER_DISABLE_MASK 0x00000010L -#define PA_SU_SMALL_PRIM_FILTER_CNTL__SRBSL_ENABLE_MASK 0x00000020L //PA_CL_OBJPRIM_ID_CNTL #define PA_CL_OBJPRIM_ID_CNTL__OBJ_ID_SEL__SHIFT 0x0 #define PA_CL_OBJPRIM_ID_CNTL__ADD_PIPED_PRIM_ID__SHIFT 0x1 @@ -17345,9 +17315,6 @@ #define VGT_DRAW_PAYLOAD_CNTL__EN_REG_RT_INDEX_MASK 0x00000002L #define VGT_DRAW_PAYLOAD_CNTL__EN_PIPELINE_PRIMID_MASK 0x00000004L #define VGT_DRAW_PAYLOAD_CNTL__OBJECT_ID_INST_EN_MASK 0x00000008L -//VGT_INDEX_PAYLOAD_CNTL -#define VGT_INDEX_PAYLOAD_CNTL__COMPOUND_INDEX_EN__SHIFT 0x0 -#define VGT_INDEX_PAYLOAD_CNTL__COMPOUND_INDEX_EN_MASK 0x00000001L //VGT_INSTANCE_STEP_RATE_0 #define VGT_INSTANCE_STEP_RATE_0__STEP_RATE__SHIFT 0x0 #define VGT_INSTANCE_STEP_RATE_0__STEP_RATE_MASK 0xFFFFFFFFL @@ -19849,9 +19816,6 @@ #define IA_MULTI_VGT_PARAM__EN_INST_OPT_BASIC_MASK 0x00200000L #define IA_MULTI_VGT_PARAM__EN_INST_OPT_ADV_MASK 0x00400000L #define IA_MULTI_VGT_PARAM__HW_USE_ONLY_MASK 0x00800000L -//VGT_OBJECT_ID -#define VGT_OBJECT_ID__REG_OBJ_ID__SHIFT 0x0 -#define VGT_OBJECT_ID__REG_OBJ_ID_MASK 0xFFFFFFFFL //VGT_INSTANCE_BASE_ID #define VGT_INSTANCE_BASE_ID__INSTANCE_BASE_ID__SHIFT 0x0 #define VGT_INSTANCE_BASE_ID__INSTANCE_BASE_ID_MASK 0xFFFFFFFFL @@ -20067,15 +20031,6 @@ //TA_CS_BC_BASE_ADDR_HI #define TA_CS_BC_BASE_ADDR_HI__ADDRESS__SHIFT 0x0 #define TA_CS_BC_BASE_ADDR_HI__ADDRESS_MASK 0x000000FFL -//TA_GRAD_ADJ_UCONFIG -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_0__SHIFT 0x0 -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_1__SHIFT 0x8 -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_2__SHIFT 0x10 -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_3__SHIFT 0x18 -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_0_MASK 0x000000FFL -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_1_MASK 0x0000FF00L -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_2_MASK 0x00FF0000L -#define TA_GRAD_ADJ_UCONFIG__GRAD_ADJ_3_MASK 0xFF000000L //DB_OCCLUSION_COUNT0_LOW #define DB_OCCLUSION_COUNT0_LOW__COUNT_LOW__SHIFT 0x0 #define DB_OCCLUSION_COUNT0_LOW__COUNT_LOW_MASK 0xFFFFFFFFL diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h index db7ef5ede0e52e67607fb3eacef89e2d001b3def..030e0020902b277ff6b2e485d54ac48bb94fc87f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_1_offset.h @@ -815,8 +815,6 @@ #define mmTA_CNTL_AUX_BASE_IDX 0 #define mmTA_RESERVED_010C 0x0543 #define mmTA_RESERVED_010C_BASE_IDX 0 -#define mmTA_GRAD_ADJ 0x0544 -#define mmTA_GRAD_ADJ_BASE_IDX 0 #define mmTA_STATUS 0x0548 #define mmTA_STATUS_BASE_IDX 0 #define mmTA_SCRATCH 0x0564 @@ -3617,8 +3615,6 @@ #define mmDB_STENCIL_WRITE_BASE_HI_BASE_IDX 1 #define mmDB_DFSM_CONTROL 0x0018 #define mmDB_DFSM_CONTROL_BASE_IDX 1 -#define mmDB_RENDER_FILTER 0x0019 -#define mmDB_RENDER_FILTER_BASE_IDX 1 #define mmDB_Z_INFO2 0x001a #define mmDB_Z_INFO2_BASE_IDX 1 #define mmDB_STENCIL_INFO2 0x001b @@ -3829,10 +3825,6 @@ #define mmPA_SC_LEFT_VERT_GRID_BASE_IDX 1 #define mmPA_SC_HORIZ_GRID 0x00ea #define mmPA_SC_HORIZ_GRID_BASE_IDX 1 -#define mmPA_SC_FOV_WINDOW_LR 0x00eb -#define mmPA_SC_FOV_WINDOW_LR_BASE_IDX 1 -#define mmPA_SC_FOV_WINDOW_TB 0x00ec -#define mmPA_SC_FOV_WINDOW_TB_BASE_IDX 1 #define mmVGT_MULTI_PRIM_IB_RESET_INDX 0x0103 #define mmVGT_MULTI_PRIM_IB_RESET_INDX_BASE_IDX 1 #define mmCB_BLEND_RED 0x0105 @@ -4361,8 +4353,6 @@ #define mmVGT_GS_MAX_PRIMS_PER_SUBGROUP_BASE_IDX 1 #define mmVGT_DRAW_PAYLOAD_CNTL 0x02a6 #define mmVGT_DRAW_PAYLOAD_CNTL_BASE_IDX 1 -#define mmVGT_INDEX_PAYLOAD_CNTL 0x02a7 -#define mmVGT_INDEX_PAYLOAD_CNTL_BASE_IDX 1 #define mmVGT_INSTANCE_STEP_RATE_0 0x02a8 #define mmVGT_INSTANCE_STEP_RATE_0_BASE_IDX 1 #define mmVGT_INSTANCE_STEP_RATE_1 0x02a9 @@ -5195,8 +5185,6 @@ #define mmWD_INDEX_BUF_BASE_HI_BASE_IDX 1 #define mmIA_MULTI_VGT_PARAM 0x2258 #define mmIA_MULTI_VGT_PARAM_BASE_IDX 1 -#define mmVGT_OBJECT_ID 0x2259 -#define mmVGT_OBJECT_ID_BASE_IDX 1 #define mmVGT_INSTANCE_BASE_ID 0x225a #define mmVGT_INSTANCE_BASE_ID_BASE_IDX 1 #define mmPA_SU_LINE_STIPPLE_VALUE 0x2280 @@ -5283,8 +5271,6 @@ #define mmTA_CS_BC_BASE_ADDR_BASE_IDX 1 #define mmTA_CS_BC_BASE_ADDR_HI 0x2381 #define mmTA_CS_BC_BASE_ADDR_HI_BASE_IDX 1 -#define mmTA_GRAD_ADJ_UCONFIG 0x2382 -#define mmTA_GRAD_ADJ_UCONFIG_BASE_IDX 1 #define mmDB_OCCLUSION_COUNT0_LOW 0x23c0 #define mmDB_OCCLUSION_COUNT0_LOW_BASE_IDX 1 #define mmDB_OCCLUSION_COUNT0_HI 0x23c1 diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index f516fd10e6ba7469856a59d039a8e1d715251a0e..a6752bd0c87131e57801786bac66ce9bbe9c6d1e 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -46,6 +46,28 @@ enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_RESET, }; +struct kfd_cu_info { + uint32_t num_shader_engines; + uint32_t num_shader_arrays_per_engine; + uint32_t num_cu_per_sh; + uint32_t cu_active_number; + uint32_t cu_ao_mask; + uint32_t simd_per_cu; + uint32_t max_waves_per_simd; + uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + uint32_t cu_bitmap[4][4]; +}; + +/* For getting GPU local memory information from KGD */ +struct kfd_local_mem_info { + uint64_t local_mem_size_private; + uint64_t local_mem_size_public; + uint32_t vram_width; + uint32_t mem_clk_max; +}; + enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, @@ -106,7 +128,7 @@ struct tile_config { * * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture * - * @get_vmem_size: Retrieves (physical) size of VRAM + * @get_local_mem_info: Retrieves information about GPU local memory * * @get_gpu_clock_counter: Retrieves GPU clock counter * @@ -131,6 +153,12 @@ struct tile_config { * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot. * used only for no HWS mode. * + * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs. + * Array is allocated with kmalloc, needs to be freed with kfree by caller. + * + * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs. + * Array is allocated with kmalloc, needs to be freed with kfree by caller. + * * @hqd_is_occupies: Checks if a hqd slot is occupied. * * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. @@ -147,6 +175,10 @@ struct tile_config { * * @get_tile_config: Returns GPU-specific tiling mode information * + * @get_cu_info: Retrieves activated cu info + * + * @get_vram_usage: Returns current VRAM usage + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -158,7 +190,8 @@ struct kfd2kgd_calls { void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); - uint64_t (*get_vmem_size)(struct kgd_dev *kgd); + void (*get_local_mem_info)(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); @@ -184,7 +217,16 @@ struct kfd2kgd_calls { uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); - int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); + int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); + + int (*hqd_dump)(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); + + int (*hqd_sdma_dump)(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); @@ -224,6 +266,10 @@ struct kfd2kgd_calls { void (*set_scratch_backing_va)(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); + + void (*get_cu_info)(struct kgd_dev *kgd, + struct kfd_cu_info *cu_info); + uint64_t (*get_vram_usage)(struct kgd_dev *kgd); }; /** diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h index 20234820194bde0c1d9ccd8475bd8c694fcf8211..717fbae1d362011bfee7185fffefc01cba3fcf71 100644 --- a/drivers/gpu/drm/amd/include/vi_structs.h +++ b/drivers/gpu/drm/amd/include/vi_structs.h @@ -153,6 +153,8 @@ struct vi_sdma_mqd { uint32_t reserved_125; uint32_t reserved_126; uint32_t reserved_127; + uint32_t sdma_engine_id; + uint32_t sdma_queue_id; }; struct vi_mqd { diff --git a/drivers/gpu/drm/amd/lib/Makefile b/drivers/gpu/drm/amd/lib/Makefile index 87cd7009e80f1b59744d4a928c9b66d74c16f5e1..690243001e1aa935be3c29ccf874e0ec82291dad 100644 --- a/drivers/gpu/drm/amd/lib/Makefile +++ b/drivers/gpu/drm/amd/lib/Makefile @@ -1,4 +1,25 @@ # +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# # Makefile for AMD library routines, which are used by AMD driver # components. # diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile index 8c55c6e254d99eb008231587bb21beb29c09a450..231785a9e24c67bb31695367f7e1294d15f47bb3 100644 --- a/drivers/gpu/drm/amd/powerplay/Makefile +++ b/drivers/gpu/drm/amd/powerplay/Makefile @@ -1,4 +1,24 @@ -# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# subdir-ccflags-y += \ -I$(FULL_AMD_PATH)/powerplay/inc/ \ diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 9d3bdada79d5e74843b58cdea7b7936bd644393d..4c3223a4d62b04d0264686e2eb6e493e6daf7c23 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -389,20 +389,12 @@ static int pp_dpm_force_performance_level(void *handle, if (level == hwmgr->dpm_level) return 0; - if (hwmgr->hwmgr_func->force_dpm_level == NULL) { - pr_info("%s was not implemented.\n", __func__); - return 0; - } - mutex_lock(&pp_handle->pp_lock); pp_dpm_en_umd_pstate(hwmgr, &level); hwmgr->request_dpm_level = level; hwmgr_handle_task(pp_handle, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); - ret = hwmgr->hwmgr_func->force_dpm_level(hwmgr, level); - if (!ret) - hwmgr->dpm_level = hwmgr->request_dpm_level; - mutex_unlock(&pp_handle->pp_lock); + return 0; } @@ -726,6 +718,8 @@ static int pp_dpm_get_pp_num_states(void *handle, struct pp_instance *pp_handle = (struct pp_instance *)handle; int ret = 0; + memset(data, 0, sizeof(*data)); + ret = pp_check(pp_handle); if (ret) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile index 824fb6fe54ae97dc2a1768f49141ee6ff0ce3a49..a212c27f2e17c99915e4bede575f6ed40cb263f0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile @@ -1,4 +1,24 @@ -# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# # # Makefile for the 'hw manager' sub-component of powerplay. # It provides the hardware management services for the driver. diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index ad1f6b57884b716620b602e51c8a53e566d61209..b314d09d41af41a14664f79741e066b4013d2670 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -728,9 +728,6 @@ static int cz_update_sclk_limit(struct pp_hwmgr *hwmgr) if (clock < stable_pstate_sclk) clock = stable_pstate_sclk; - } else { - if (clock < hwmgr->gfx_arbiter.sclk) - clock = hwmgr->gfx_arbiter.sclk; } if (cz_hwmgr->sclk_dpm.soft_min_clk != clock) { @@ -1085,14 +1082,8 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, uint32_t num_of_active_displays = 0; struct cgs_display_info info = {0}; - cz_ps->evclk = hwmgr->vce_arbiter.evclk; - cz_ps->ecclk = hwmgr->vce_arbiter.ecclk; - cz_ps->need_dfs_bypass = true; - cz_hwmgr->video_start = (hwmgr->uvd_arbiter.vclk != 0 || hwmgr->uvd_arbiter.dclk != 0 || - hwmgr->vce_arbiter.evclk != 0 || hwmgr->vce_arbiter.ecclk != 0); - cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ? @@ -1105,9 +1096,6 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; - if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk) - clocks.memoryClock = hwmgr->gfx_arbiter.mclk; - force_high = (clocks.memoryClock > cz_hwmgr->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1]) || (num_of_active_displays >= 3); @@ -1339,22 +1327,13 @@ int cz_dpm_update_vce_dpm(struct pp_hwmgr *hwmgr) cz_hwmgr->vce_dpm.hard_min_clk, PPSMC_MSG_SetEclkHardMin)); } else { - /*Program HardMin based on the vce_arbiter.ecclk */ - if (hwmgr->vce_arbiter.ecclk == 0) { - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetEclkHardMin, 0); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetEclkHardMin, 0); /* disable ECLK DPM 0. Otherwise VCE could hang if * switching SCLK from DPM 0 to 6/7 */ - smum_send_msg_to_smc_with_parameter(hwmgr, + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetEclkSoftMin, 1); - } else { - cz_hwmgr->vce_dpm.hard_min_clk = hwmgr->vce_arbiter.ecclk; - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetEclkHardMin, - cz_get_eclk_level(hwmgr, - cz_hwmgr->vce_dpm.hard_min_clk, - PPSMC_MSG_SetEclkHardMin)); - } } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 623cff90233d450f60a7021b260edc7a7d63a911..2b0c53fe4c8dfb47a5b6353b0c0fd5fbf9dba753 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -112,26 +112,29 @@ int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level PHM_FUNC_CHECK(hwmgr); - if (hwmgr->hwmgr_func->force_dpm_level != NULL) { + if (hwmgr->hwmgr_func->force_dpm_level != NULL) ret = hwmgr->hwmgr_func->force_dpm_level(hwmgr, level); - if (ret) - return ret; - - if (hwmgr->hwmgr_func->set_power_profile_state) { - if (hwmgr->current_power_profile == AMD_PP_GFX_PROFILE) - ret = hwmgr->hwmgr_func->set_power_profile_state( - hwmgr, - &hwmgr->gfx_power_profile); - else if (hwmgr->current_power_profile == AMD_PP_COMPUTE_PROFILE) - ret = hwmgr->hwmgr_func->set_power_profile_state( - hwmgr, - &hwmgr->compute_power_profile); - } - } return ret; } +int phm_reset_power_profile_state(struct pp_hwmgr *hwmgr) +{ + int ret = 0; + + if (hwmgr->hwmgr_func->set_power_profile_state) { + if (hwmgr->current_power_profile == AMD_PP_GFX_PROFILE) + ret = hwmgr->hwmgr_func->set_power_profile_state( + hwmgr, + &hwmgr->gfx_power_profile); + else if (hwmgr->current_power_profile == AMD_PP_COMPUTE_PROFILE) + ret = hwmgr->hwmgr_func->set_power_profile_state( + hwmgr, + &hwmgr->compute_power_profile); + } + return ret; +} + int phm_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *adjusted_ps, const struct pp_power_state *current_ps) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index ce59e0e67cb22d632b27c3dce86708bd19a7fa4a..0229f774f7a93262196d4136a3ffa415b6940904 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -149,6 +149,7 @@ int hwmgr_early_init(struct pp_instance *handle) hwmgr->power_source = PP_PowerSource_AC; hwmgr->pp_table_version = PP_TABLE_V1; hwmgr->dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; + hwmgr->request_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; hwmgr_init_default_caps(hwmgr); hwmgr_set_user_specify_caps(hwmgr); hwmgr->fan_ctrl_is_in_default_mode = true; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c index 67fae834bc6788680eddd50b8f55a737f4b013ab..8de384bf9a8fb62a12bb90086a17bd4fd04c9de5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_overdriver.c @@ -1,4 +1,26 @@ -// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + #include "pp_overdriver.h" #include diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c index ffa44bbb218e4e59004a97e6b148d66f48ae1af4..95ab772e0c3e3c3b1eacd4b95b1844ccbdd1cbbd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c @@ -244,6 +244,10 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip, } phm_notify_smc_display_config_after_ps_adjustment(hwmgr); + if (!phm_force_dpm_levels(hwmgr, hwmgr->request_dpm_level)) + hwmgr->dpm_level = hwmgr->request_dpm_level; + + phm_reset_power_profile_state(hwmgr); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c index 3e0b267c74a88991fde93055f7e3460d2a985379..569073e3a5a1372485caf56976d83479e4330fa4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c @@ -159,7 +159,6 @@ static int rv_construct_boot_state(struct pp_hwmgr *hwmgr) static int rv_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input) { - struct rv_hwmgr *rv_data = (struct rv_hwmgr *)(hwmgr->backend); struct PP_Clocks clocks = {0}; struct pp_display_clock_request clock_req; @@ -170,39 +169,6 @@ static int rv_set_clock_limit(struct pp_hwmgr *hwmgr, const void *input) PP_ASSERT_WITH_CODE(!rv_display_clock_voltage_request(hwmgr, &clock_req), "Attempt to set DCF Clock Failed!", return -EINVAL); - if (((hwmgr->uvd_arbiter.vclk_soft_min / 100) != rv_data->vclk_soft_min) || - ((hwmgr->uvd_arbiter.dclk_soft_min / 100) != rv_data->dclk_soft_min)) { - rv_data->vclk_soft_min = hwmgr->uvd_arbiter.vclk_soft_min / 100; - rv_data->dclk_soft_min = hwmgr->uvd_arbiter.dclk_soft_min / 100; - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetSoftMinVcn, - (rv_data->vclk_soft_min << 16) | rv_data->vclk_soft_min); - } - - if((hwmgr->gfx_arbiter.sclk_hard_min != 0) && - ((hwmgr->gfx_arbiter.sclk_hard_min / 100) != rv_data->soc_actual_hard_min_freq)) { - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetHardMinSocclkByFreq, - hwmgr->gfx_arbiter.sclk_hard_min / 100); - rv_read_arg_from_smc(hwmgr, &rv_data->soc_actual_hard_min_freq); - } - - if ((hwmgr->gfx_arbiter.gfxclk != 0) && - (rv_data->gfx_actual_soft_min_freq != (hwmgr->gfx_arbiter.gfxclk))) { - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetMinVideoGfxclkFreq, - hwmgr->gfx_arbiter.gfxclk / 100); - rv_read_arg_from_smc(hwmgr, &rv_data->gfx_actual_soft_min_freq); - } - - if ((hwmgr->gfx_arbiter.fclk != 0) && - (rv_data->fabric_actual_soft_min_freq != (hwmgr->gfx_arbiter.fclk / 100))) { - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SetMinVideoFclkFreq, - hwmgr->gfx_arbiter.fclk / 100); - rv_read_arg_from_smc(hwmgr, &rv_data->fabric_actual_soft_min_freq); - } - return 0; } @@ -518,17 +484,161 @@ static int rv_hwmgr_backend_fini(struct pp_hwmgr *hwmgr) static int rv_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { + if (hwmgr->smu_version < 0x1E3700) { + pr_info("smu firmware version too old, can not set dpm level\n"); + return 0; + } + + switch (level) { + case AMD_DPM_FORCED_LEVEL_HIGH: + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + RAVEN_UMD_PSTATE_PEAK_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + RAVEN_UMD_PSTATE_PEAK_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinSocclkByFreq, + RAVEN_UMD_PSTATE_PEAK_SOCCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinVcn, + RAVEN_UMD_PSTATE_VCE); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + RAVEN_UMD_PSTATE_PEAK_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + RAVEN_UMD_PSTATE_PEAK_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxSocclkByFreq, + RAVEN_UMD_PSTATE_PEAK_SOCCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxVcn, + RAVEN_UMD_PSTATE_VCE); + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + RAVEN_UMD_PSTATE_MIN_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + RAVEN_UMD_PSTATE_MIN_GFXCLK); + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + RAVEN_UMD_PSTATE_MIN_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + RAVEN_UMD_PSTATE_MIN_FCLK); + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + RAVEN_UMD_PSTATE_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + RAVEN_UMD_PSTATE_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinSocclkByFreq, + RAVEN_UMD_PSTATE_SOCCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinVcn, + RAVEN_UMD_PSTATE_VCE); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + RAVEN_UMD_PSTATE_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + RAVEN_UMD_PSTATE_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxSocclkByFreq, + RAVEN_UMD_PSTATE_SOCCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxVcn, + RAVEN_UMD_PSTATE_VCE); + break; + case AMD_DPM_FORCED_LEVEL_AUTO: + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + RAVEN_UMD_PSTATE_MIN_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + RAVEN_UMD_PSTATE_MIN_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinSocclkByFreq, + RAVEN_UMD_PSTATE_MIN_SOCCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinVcn, + RAVEN_UMD_PSTATE_MIN_VCE); + + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + RAVEN_UMD_PSTATE_PEAK_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + RAVEN_UMD_PSTATE_PEAK_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxSocclkByFreq, + RAVEN_UMD_PSTATE_PEAK_SOCCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxVcn, + RAVEN_UMD_PSTATE_VCE); + break; + case AMD_DPM_FORCED_LEVEL_LOW: + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + RAVEN_UMD_PSTATE_MIN_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + RAVEN_UMD_PSTATE_MIN_GFXCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + RAVEN_UMD_PSTATE_MIN_FCLK); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxFclkByFreq, + RAVEN_UMD_PSTATE_MIN_FCLK); + break; + case AMD_DPM_FORCED_LEVEL_MANUAL: + case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: + default: + break; + } return 0; } static uint32_t rv_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low) { - return 0; + struct rv_hwmgr *data; + + if (hwmgr == NULL) + return -EINVAL; + + data = (struct rv_hwmgr *)(hwmgr->backend); + + if (low) + return data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk; + else + return data->clock_vol_info.vdd_dep_on_fclk->entries[ + data->clock_vol_info.vdd_dep_on_fclk->count - 1].clk; } static uint32_t rv_dpm_get_sclk(struct pp_hwmgr *hwmgr, bool low) { - return 0; + struct rv_hwmgr *data; + + if (hwmgr == NULL) + return -EINVAL; + + data = (struct rv_hwmgr *)(hwmgr->backend); + + if (low) + return data->gfx_min_freq_limit; + else + return data->gfx_max_freq_limit; } static int rv_dpm_patch_boot_state(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h index 9dc50305539468929663f5887d000ba9ae5521ef..c3bc311dc59ff3e3894f5ca221faecfcc0768c9f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h @@ -304,4 +304,19 @@ struct pp_hwmgr; int rv_init_function_pointers(struct pp_hwmgr *hwmgr); +/* UMD PState Raven Msg Parameters in MHz */ +#define RAVEN_UMD_PSTATE_GFXCLK 700 +#define RAVEN_UMD_PSTATE_SOCCLK 626 +#define RAVEN_UMD_PSTATE_FCLK 933 +#define RAVEN_UMD_PSTATE_VCE 0x03C00320 + +#define RAVEN_UMD_PSTATE_PEAK_GFXCLK 1100 +#define RAVEN_UMD_PSTATE_PEAK_SOCCLK 757 +#define RAVEN_UMD_PSTATE_PEAK_FCLK 1200 + +#define RAVEN_UMD_PSTATE_MIN_GFXCLK 200 +#define RAVEN_UMD_PSTATE_MIN_FCLK 400 +#define RAVEN_UMD_PSTATE_MIN_SOCCLK 200 +#define RAVEN_UMD_PSTATE_MIN_VCE 0x0190012C + #endif diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 8edb0c4c3876c8ca141d8603e7256c7172a3f9ae..41e42beff213905837c15db1841800b2021ec502 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2722,9 +2722,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, } } - smu7_ps->vce_clks.evclk = hwmgr->vce_arbiter.evclk; - smu7_ps->vce_clks.ecclk = hwmgr->vce_arbiter.ecclk; - cgs_get_active_displays_info(hwmgr->device, &info); minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock; @@ -2754,38 +2751,6 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, minimum_clocks.memoryClock = stable_pstate_mclk; } - if (minimum_clocks.engineClock < hwmgr->gfx_arbiter.sclk) - minimum_clocks.engineClock = hwmgr->gfx_arbiter.sclk; - - if (minimum_clocks.memoryClock < hwmgr->gfx_arbiter.mclk) - minimum_clocks.memoryClock = hwmgr->gfx_arbiter.mclk; - - smu7_ps->sclk_threshold = hwmgr->gfx_arbiter.sclk_threshold; - - if (0 != hwmgr->gfx_arbiter.sclk_over_drive) { - PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.sclk_over_drive <= - hwmgr->platform_descriptor.overdriveLimit.engineClock), - "Overdrive sclk exceeds limit", - hwmgr->gfx_arbiter.sclk_over_drive = - hwmgr->platform_descriptor.overdriveLimit.engineClock); - - if (hwmgr->gfx_arbiter.sclk_over_drive >= hwmgr->gfx_arbiter.sclk) - smu7_ps->performance_levels[1].engine_clock = - hwmgr->gfx_arbiter.sclk_over_drive; - } - - if (0 != hwmgr->gfx_arbiter.mclk_over_drive) { - PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.mclk_over_drive <= - hwmgr->platform_descriptor.overdriveLimit.memoryClock), - "Overdrive mclk exceeds limit", - hwmgr->gfx_arbiter.mclk_over_drive = - hwmgr->platform_descriptor.overdriveLimit.memoryClock); - - if (hwmgr->gfx_arbiter.mclk_over_drive >= hwmgr->gfx_arbiter.mclk) - smu7_ps->performance_levels[1].memory_clock = - hwmgr->gfx_arbiter.mclk_over_drive; - } - disable_mclk_switching_for_frame_lock = phm_cap_enabled( hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); @@ -4686,6 +4651,25 @@ static int smu7_notify_cac_buffer_info(struct pp_hwmgr *hwmgr, return 0; } +static int smu7_get_max_high_clocks(struct pp_hwmgr *hwmgr, + struct amd_pp_simple_clock_info *clocks) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); + struct smu7_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); + + if (clocks == NULL) + return -EINVAL; + + clocks->memory_max_clock = mclk_table->count > 1 ? + mclk_table->dpm_levels[mclk_table->count-1].value : + mclk_table->dpm_levels[0].value; + clocks->engine_max_clock = sclk_table->count > 1 ? + sclk_table->dpm_levels[sclk_table->count-1].value : + sclk_table->dpm_levels[0].value; + return 0; +} + static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .backend_init = &smu7_hwmgr_backend_init, .backend_fini = &smu7_hwmgr_backend_fini, @@ -4738,6 +4722,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = { .disable_smc_firmware_ctf = smu7_thermal_disable_alert, .start_thermal_controller = smu7_start_thermal_controller, .notify_cac_buffer_info = smu7_notify_cac_buffer_info, + .get_max_high_clocks = smu7_get_max_high_clocks, }; uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 07d256d136addf62d3b5bc5c22a9888dceab6050..2d55dabc77d4112d0b1c50bf1e1ae38ac0505dd0 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -426,9 +426,9 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) data->smu_features[GNLD_VR0HOT].supported = true; smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion); - vega10_read_arg_from_smc(hwmgr, &(data->smu_version)); + vega10_read_arg_from_smc(hwmgr, &(hwmgr->smu_version)); /* ACG firmware has major version 5 */ - if ((data->smu_version & 0xff000000) == 0x5000000) + if ((hwmgr->smu_version & 0xff000000) == 0x5000000) data->smu_features[GNLD_ACG].supported = true; if (data->registry_data.didt_support) @@ -2879,8 +2879,8 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "DPM is already running right , skipping re-enablement!", return 0); - if ((data->smu_version == 0x001c2c00) || - (data->smu_version == 0x001c2d00)) { + if ((hwmgr->smu_version == 0x001c2c00) || + (hwmgr->smu_version == 0x001c2d00)) { tmp_result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_UpdatePkgPwrPidAlpha, 1); PP_ASSERT_WITH_CODE(!tmp_result, @@ -3124,9 +3124,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, } } - vega10_ps->vce_clks.evclk = hwmgr->vce_arbiter.evclk; - vega10_ps->vce_clks.ecclk = hwmgr->vce_arbiter.ecclk; - cgs_get_active_displays_info(hwmgr->device, &info); /* result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/ @@ -3165,38 +3162,6 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, minimum_clocks.memoryClock = stable_pstate_mclk; } - if (minimum_clocks.engineClock < hwmgr->gfx_arbiter.sclk) - minimum_clocks.engineClock = hwmgr->gfx_arbiter.sclk; - - if (minimum_clocks.memoryClock < hwmgr->gfx_arbiter.mclk) - minimum_clocks.memoryClock = hwmgr->gfx_arbiter.mclk; - - vega10_ps->sclk_threshold = hwmgr->gfx_arbiter.sclk_threshold; - - if (hwmgr->gfx_arbiter.sclk_over_drive) { - PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.sclk_over_drive <= - hwmgr->platform_descriptor.overdriveLimit.engineClock), - "Overdrive sclk exceeds limit", - hwmgr->gfx_arbiter.sclk_over_drive = - hwmgr->platform_descriptor.overdriveLimit.engineClock); - - if (hwmgr->gfx_arbiter.sclk_over_drive >= hwmgr->gfx_arbiter.sclk) - vega10_ps->performance_levels[1].gfx_clock = - hwmgr->gfx_arbiter.sclk_over_drive; - } - - if (hwmgr->gfx_arbiter.mclk_over_drive) { - PP_ASSERT_WITH_CODE((hwmgr->gfx_arbiter.mclk_over_drive <= - hwmgr->platform_descriptor.overdriveLimit.memoryClock), - "Overdrive mclk exceeds limit", - hwmgr->gfx_arbiter.mclk_over_drive = - hwmgr->platform_descriptor.overdriveLimit.memoryClock); - - if (hwmgr->gfx_arbiter.mclk_over_drive >= hwmgr->gfx_arbiter.mclk) - vega10_ps->performance_levels[1].mem_clock = - hwmgr->gfx_arbiter.mclk_over_drive; - } - disable_mclk_switching_for_frame_lock = phm_cap_enabled( hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DisableMclkSwitchingForFrameLock); @@ -3819,10 +3784,7 @@ static int vega10_update_sclk_threshold(struct pp_hwmgr *hwmgr) uint32_t low_sclk_interrupt_threshold = 0; if (PP_CAP(PHM_PlatformCaps_SclkThrottleLowNotification) && - (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; + (data->low_sclk_interrupt_threshold != 0)) { low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index 8f7358cc3327b779a8000d052da6e6e79d622ac3..e8507ff8dbb3b08fddb72f821b2af3d69788a1a1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -387,7 +387,6 @@ struct vega10_hwmgr { struct vega10_smc_state_table smc_state_table; uint32_t config_telemetry; - uint32_t smu_version; uint32_t acg_loop_state; uint32_t mem_channels; }; diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 57a0467b72676c9a3a06bb79db285ec5f063700f..5716b937a6ad41e616b321762d4ebf990b3b91e9 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -437,5 +437,6 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr, extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr); +extern int phm_reset_power_profile_state(struct pp_hwmgr *hwmgr); #endif /* _HARDWARE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 004a40e88bdef05a675812e049ab805f3dbe4de0..565fe0832f415da540f2685d916dde4b270f8fc2 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -105,36 +105,6 @@ struct phm_set_power_state_input { const struct pp_hw_power_state *pnew_state; }; -struct phm_acp_arbiter { - uint32_t acpclk; -}; - -struct phm_uvd_arbiter { - uint32_t vclk; - uint32_t dclk; - uint32_t vclk_ceiling; - uint32_t dclk_ceiling; - uint32_t vclk_soft_min; - uint32_t dclk_soft_min; -}; - -struct phm_vce_arbiter { - uint32_t evclk; - uint32_t ecclk; -}; - -struct phm_gfx_arbiter { - uint32_t sclk; - uint32_t sclk_hard_min; - uint32_t mclk; - uint32_t sclk_over_drive; - uint32_t mclk_over_drive; - uint32_t sclk_threshold; - uint32_t num_cus; - uint32_t gfxclk; - uint32_t fclk; -}; - struct phm_clock_array { uint32_t count; uint32_t values[1]; @@ -722,6 +692,7 @@ enum PP_TABLE_VERSION { struct pp_hwmgr { uint32_t chip_family; uint32_t chip_id; + uint32_t smu_version; uint32_t pp_table_version; void *device; @@ -737,10 +708,6 @@ struct pp_hwmgr { enum amd_dpm_forced_level dpm_level; enum amd_dpm_forced_level saved_dpm_level; enum amd_dpm_forced_level request_dpm_level; - struct phm_gfx_arbiter gfx_arbiter; - struct phm_acp_arbiter acp_arbiter; - struct phm_uvd_arbiter uvd_arbiter; - struct phm_vce_arbiter vce_arbiter; uint32_t usec_timeout; void *pptable; struct phm_platform_descriptor platform_descriptor; diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h index 2b3497135bbd67879d3c755e8d41748b63b8a6df..f15f4df9d0a9920888575dd324bbbb89d9a3ce83 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h @@ -75,7 +75,12 @@ #define PPSMC_MSG_GetMinGfxclkFrequency 0x2C #define PPSMC_MSG_GetMaxGfxclkFrequency 0x2D #define PPSMC_MSG_SoftReset 0x2E -#define PPSMC_Message_Count 0x2F +#define PPSMC_MSG_SetSoftMaxGfxClk 0x30 +#define PPSMC_MSG_SetHardMinGfxClk 0x31 +#define PPSMC_MSG_SetSoftMaxSocclkByFreq 0x32 +#define PPSMC_MSG_SetSoftMaxFclkByFreq 0x33 +#define PPSMC_MSG_SetSoftMaxVcn 0x34 +#define PPSMC_Message_Count 0x35 typedef uint16_t PPSMC_Result; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu72.h b/drivers/gpu/drm/amd/powerplay/inc/smu72.h index 08cd70c75d8b8e55c8d19b51b680c703441685bf..9ad1cefff79fc17786cbacfd22af8b00365cc8b5 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu72.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu72.h @@ -1,4 +1,26 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + #ifndef SMU72_H #define SMU72_H diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h index b2edbc0c3c4dfea4b66d3c8d5b7b5a38b24c165a..2aefbb85f62003de2e9020f5d8496642dcdebac3 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu72_discrete.h @@ -1,4 +1,26 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + #ifndef SMU72_DISCRETE_H #define SMU72_DISCRETE_H diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile index 30d3089d7dbafdc45ce9b666edb4a65916ab029d..98e701e4f55383eeee93d1a7ebd60c8d15e7b80c 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile +++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile @@ -1,4 +1,24 @@ -# SPDX-License-Identifier: GPL-2.0 +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# # # Makefile for the 'smu manager' sub-component of powerplay. # It provides the smu management services for the driver. diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index c36f00ef46f3955b904cfbf0f73ab98455f67f43..0b4a55660de47f1d5e79f4d19596717864bf6e29 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2218,10 +2218,7 @@ static int ci_update_sclk_threshold(struct pp_hwmgr *hwmgr) if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; + && (data->low_sclk_interrupt_threshold != 0)) { low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; @@ -2319,6 +2316,7 @@ static int ci_load_smc_ucode(struct pp_hwmgr *hwmgr) cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU, &info); hwmgr->is_kicker = info.is_kicker; + hwmgr->smu_version = info.version; byte_count = info.image_size; src = (uint8_t *)info.kptr; start_addr = info.ucode_start_address; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c index 78ab0556e48f7c0fd2cb316d1ea606e7b802a1ce..4d3aff381bca69c0fc6950b5d621cc13de258ab2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c @@ -709,6 +709,19 @@ static int cz_start_smu(struct pp_hwmgr *hwmgr) { int ret = 0; uint32_t fw_to_check = 0; + struct cgs_firmware_info info = {0}; + uint32_t index = SMN_MP1_SRAM_START_ADDR + + SMU8_FIRMWARE_HEADER_LOCATION + + offsetof(struct SMU8_Firmware_Header, Version); + + + if (hwmgr == NULL || hwmgr->device == NULL) + return -EINVAL; + + cgs_write_register(hwmgr->device, mmMP0PUB_IND_INDEX, index); + hwmgr->smu_version = cgs_read_register(hwmgr->device, mmMP0PUB_IND_DATA); + info.version = hwmgr->smu_version >> 8; + cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU, &info); fw_to_check = UCODE_ID_RLC_G_MASK | UCODE_ID_SDMA0_MASK | diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index f572beff197f0630b413bd948676c4519d6c1da5..085d81c8b33259a1af1b64aa171eb417b3d932a7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -2385,10 +2385,7 @@ static int fiji_update_sclk_threshold(struct pp_hwmgr *hwmgr) if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; + && (data->low_sclk_interrupt_threshold != 0)) { low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index d62078681cae9d72d0414b48894f7532dfd16182..125312691f7539689e467fdcdc580bf9d45fa1a7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -204,7 +204,7 @@ static int iceland_smu_upload_firmware_image(struct pp_hwmgr *hwmgr) pr_err("[ powerplay ] SMC address is beyond the SMC RAM area\n"); return -EINVAL; } - + hwmgr->smu_version = info.version; /* wait for smc boot up */ PHM_WAIT_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0); @@ -2202,10 +2202,7 @@ static int iceland_update_sclk_threshold(struct pp_hwmgr *hwmgr) if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; + && (data->low_sclk_interrupt_threshold != 0)) { low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index bd6be7793ca79ffb187ac2d55e225ba13df2c75d..cdb47657b56774308ebb7cc88848b543da92e10b 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -2369,10 +2369,7 @@ static int polaris10_update_sclk_threshold(struct pp_hwmgr *hwmgr) if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; + && (data->low_sclk_interrupt_threshold != 0)) { low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c index b98ade676d128b78b0f6236170281244d58199c2..2d662b44af54b6fe1bcfba18c84576e9107abcbc 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.c @@ -305,6 +305,14 @@ static int rv_smu_fini(struct pp_hwmgr *hwmgr) static int rv_start_smu(struct pp_hwmgr *hwmgr) { + struct cgs_firmware_info info = {0}; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetSmuVersion); + rv_read_arg_from_smc(hwmgr, &hwmgr->smu_version); + info.version = hwmgr->smu_version >> 8; + + cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU, &info); + if (rv_verify_smc_interface(hwmgr)) return -EINVAL; if (rv_smc_enable_sdma(hwmgr)) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h index 58888400f1b8df4ab9a384ecd782d44f9f675420..caebdbebdcd8a36d560582b991fd56e3e2a9e924 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/rv_smumgr.h @@ -40,7 +40,7 @@ struct smu_table_entry { uint32_t table_addr_high; uint32_t table_addr_low; uint8_t *table; - uint32_t handle; + unsigned long handle; }; struct smu_table_array { diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c index 7f5359a97ef2d8f4a4182b1e708a2eff071f4be0..311ff371861899c5434eced750aabf8330c814a2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c @@ -535,7 +535,7 @@ int smu7_upload_smu_firmware_image(struct pp_hwmgr *hwmgr) smu7_convert_fw_type_to_cgs(UCODE_ID_SMU_SK), &info); hwmgr->is_kicker = info.is_kicker; - + hwmgr->smu_version = info.version; result = smu7_upload_smc_firmware_data(hwmgr, info.image_size, (uint32_t *)info.kptr, SMU7_SMC_SIZE); return result; @@ -648,6 +648,12 @@ int smu7_init(struct pp_hwmgr *hwmgr) int smu7_smu_fini(struct pp_hwmgr *hwmgr) { + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(hwmgr->smu_backend); + + smu_free_memory(hwmgr->device, (void *) smu_data->header_buffer.handle); + if (!cgs_is_virtualization_enabled(hwmgr->device)) + smu_free_memory(hwmgr->device, (void *) smu_data->smu_buffer.handle); + kfree(hwmgr->smu_backend); hwmgr->smu_backend = NULL; cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 81b8790c0d2210740a196799bb64108718af6d97..79e5c05571bcb829b03207f836bba5c01be2c38e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -2654,10 +2654,7 @@ static int tonga_update_sclk_threshold(struct pp_hwmgr *hwmgr) if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkThrottleLowNotification) - && (hwmgr->gfx_arbiter.sclk_threshold != - data->low_sclk_interrupt_threshold)) { - data->low_sclk_interrupt_threshold = - hwmgr->gfx_arbiter.sclk_threshold; + && (data->low_sclk_interrupt_threshold != 0)) { low_sclk_interrupt_threshold = data->low_sclk_interrupt_threshold; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h deleted file mode 100644 index eebe323c7159cc7754952a90dbd09b6f455730a1..0000000000000000000000000000000000000000 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _GPU_SCHED_TRACE_H_ - -#include -#include -#include - -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM gpu_sched -#define TRACE_INCLUDE_FILE gpu_sched_trace - -TRACE_EVENT(amd_sched_job, - TP_PROTO(struct amd_sched_job *sched_job, struct amd_sched_entity *entity), - TP_ARGS(sched_job, entity), - TP_STRUCT__entry( - __field(struct amd_sched_entity *, entity) - __field(struct dma_fence *, fence) - __field(const char *, name) - __field(uint64_t, id) - __field(u32, job_count) - __field(int, hw_job_count) - ), - - TP_fast_assign( - __entry->entity = entity; - __entry->id = sched_job->id; - __entry->fence = &sched_job->s_fence->finished; - __entry->name = sched_job->sched->name; - __entry->job_count = spsc_queue_count(&entity->job_queue); - __entry->hw_job_count = atomic_read( - &sched_job->sched->hw_rq_count); - ), - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->id, - __entry->fence, __entry->name, - __entry->job_count, __entry->hw_job_count) -); - -TRACE_EVENT(amd_sched_process_job, - TP_PROTO(struct amd_sched_fence *fence), - TP_ARGS(fence), - TP_STRUCT__entry( - __field(struct dma_fence *, fence) - ), - - TP_fast_assign( - __entry->fence = &fence->finished; - ), - TP_printk("fence=%p signaled", __entry->fence) -); - -#endif - -/* This part must be outside protection */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#include diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h deleted file mode 100644 index b590fcc2786a5da7b9ad2345b5d12b5f0055e762..0000000000000000000000000000000000000000 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _GPU_SCHEDULER_H_ -#define _GPU_SCHEDULER_H_ - -#include -#include -#include "spsc_queue.h" - -struct amd_gpu_scheduler; -struct amd_sched_rq; - -enum amd_sched_priority { - AMD_SCHED_PRIORITY_MIN, - AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN, - AMD_SCHED_PRIORITY_NORMAL, - AMD_SCHED_PRIORITY_HIGH_SW, - AMD_SCHED_PRIORITY_HIGH_HW, - AMD_SCHED_PRIORITY_KERNEL, - AMD_SCHED_PRIORITY_MAX, - AMD_SCHED_PRIORITY_INVALID = -1, - AMD_SCHED_PRIORITY_UNSET = -2 -}; - - -/** - * A scheduler entity is a wrapper around a job queue or a group - * of other entities. Entities take turns emitting jobs from their - * job queues to corresponding hardware ring based on scheduling - * policy. -*/ -struct amd_sched_entity { - struct list_head list; - struct amd_sched_rq *rq; - spinlock_t rq_lock; - struct amd_gpu_scheduler *sched; - - spinlock_t queue_lock; - struct spsc_queue job_queue; - - atomic_t fence_seq; - uint64_t fence_context; - - struct dma_fence *dependency; - struct dma_fence_cb cb; - atomic_t *guilty; /* points to ctx's guilty */ -}; - -/** - * Run queue is a set of entities scheduling command submissions for - * one specific ring. It implements the scheduling policy that selects - * the next entity to emit commands from. -*/ -struct amd_sched_rq { - spinlock_t lock; - struct list_head entities; - struct amd_sched_entity *current_entity; -}; - -struct amd_sched_fence { - struct dma_fence scheduled; - struct dma_fence finished; - struct dma_fence_cb cb; - struct dma_fence *parent; - struct amd_gpu_scheduler *sched; - spinlock_t lock; - void *owner; -}; - -struct amd_sched_job { - struct spsc_node queue_node; - struct amd_gpu_scheduler *sched; - struct amd_sched_fence *s_fence; - struct dma_fence_cb finish_cb; - struct work_struct finish_work; - struct list_head node; - struct delayed_work work_tdr; - uint64_t id; - atomic_t karma; - enum amd_sched_priority s_priority; -}; - -extern const struct dma_fence_ops amd_sched_fence_ops_scheduled; -extern const struct dma_fence_ops amd_sched_fence_ops_finished; -static inline struct amd_sched_fence *to_amd_sched_fence(struct dma_fence *f) -{ - if (f->ops == &amd_sched_fence_ops_scheduled) - return container_of(f, struct amd_sched_fence, scheduled); - - if (f->ops == &amd_sched_fence_ops_finished) - return container_of(f, struct amd_sched_fence, finished); - - return NULL; -} - -static inline bool amd_sched_invalidate_job(struct amd_sched_job *s_job, int threshold) -{ - return (s_job && atomic_inc_return(&s_job->karma) > threshold); -} - -/** - * Define the backend operations called by the scheduler, - * these functions should be implemented in driver side -*/ -struct amd_sched_backend_ops { - struct dma_fence *(*dependency)(struct amd_sched_job *sched_job, - struct amd_sched_entity *s_entity); - struct dma_fence *(*run_job)(struct amd_sched_job *sched_job); - void (*timedout_job)(struct amd_sched_job *sched_job); - void (*free_job)(struct amd_sched_job *sched_job); -}; - -/** - * One scheduler is implemented for each hardware ring -*/ -struct amd_gpu_scheduler { - const struct amd_sched_backend_ops *ops; - uint32_t hw_submission_limit; - long timeout; - const char *name; - struct amd_sched_rq sched_rq[AMD_SCHED_PRIORITY_MAX]; - wait_queue_head_t wake_up_worker; - wait_queue_head_t job_scheduled; - atomic_t hw_rq_count; - atomic64_t job_id_count; - struct task_struct *thread; - struct list_head ring_mirror_list; - spinlock_t job_list_lock; - int hang_limit; -}; - -int amd_sched_init(struct amd_gpu_scheduler *sched, - const struct amd_sched_backend_ops *ops, - uint32_t hw_submission, unsigned hang_limit, long timeout, const char *name); -void amd_sched_fini(struct amd_gpu_scheduler *sched); - -int amd_sched_entity_init(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity, - struct amd_sched_rq *rq, - uint32_t jobs, atomic_t* guilty); -void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity); -void amd_sched_entity_push_job(struct amd_sched_job *sched_job, - struct amd_sched_entity *entity); -void amd_sched_entity_set_rq(struct amd_sched_entity *entity, - struct amd_sched_rq *rq); - -int amd_sched_fence_slab_init(void); -void amd_sched_fence_slab_fini(void); - -struct amd_sched_fence *amd_sched_fence_create( - struct amd_sched_entity *s_entity, void *owner); -void amd_sched_fence_scheduled(struct amd_sched_fence *fence); -void amd_sched_fence_finished(struct amd_sched_fence *fence); -int amd_sched_job_init(struct amd_sched_job *job, - struct amd_gpu_scheduler *sched, - struct amd_sched_entity *entity, - void *owner); -void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job); -void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); -bool amd_sched_dependency_optimized(struct dma_fence* fence, - struct amd_sched_entity *entity); -void amd_sched_job_kickout(struct amd_sched_job *s_job); - -#endif diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index e080e31a8513a0b74c5b9150be2fa0f293df7d49..3d82712d80028cffdfab2bc93e03960c683ea2c4 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -24,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -183,13 +183,6 @@ static int malidp_set_and_wait_config_valid(struct drm_device *drm) return (ret > 0) ? 0 : -ETIMEDOUT; } -static void malidp_output_poll_changed(struct drm_device *drm) -{ - struct malidp_drm *malidp = drm->dev_private; - - drm_fbdev_cma_hotplug_event(malidp->fbdev); -} - static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state) { struct drm_pending_vblank_event *event; @@ -252,7 +245,7 @@ static const struct drm_mode_config_helper_funcs malidp_mode_config_helpers = { static const struct drm_mode_config_funcs malidp_mode_config_funcs = { .fb_create = drm_gem_fb_create, - .output_poll_changed = malidp_output_poll_changed, + .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -317,19 +310,12 @@ static int malidp_irq_init(struct platform_device *pdev) return 0; } -static void malidp_lastclose(struct drm_device *drm) -{ - struct malidp_drm *malidp = drm->dev_private; - - drm_fbdev_cma_restore_mode(malidp->fbdev); -} - DEFINE_DRM_GEM_CMA_FOPS(fops); static struct drm_driver malidp_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_PRIME, - .lastclose = malidp_lastclose, + .lastclose = drm_fb_helper_lastclose, .gem_free_object_unlocked = drm_gem_cma_free_object, .gem_vm_ops = &drm_gem_cma_vm_ops, .dumb_create = drm_gem_cma_dumb_create, @@ -623,14 +609,9 @@ static int malidp_bind(struct device *dev) drm_mode_config_reset(drm); - malidp->fbdev = drm_fbdev_cma_init(drm, 32, - drm->mode_config.num_connector); - - if (IS_ERR(malidp->fbdev)) { - ret = PTR_ERR(malidp->fbdev); - malidp->fbdev = NULL; + ret = drm_fb_cma_fbdev_init(drm, 32, 0); + if (ret) goto fbdev_fail; - } drm_kms_helper_poll_init(drm); @@ -641,10 +622,7 @@ static int malidp_bind(struct device *dev) return 0; register_fail: - if (malidp->fbdev) { - drm_fbdev_cma_fini(malidp->fbdev); - malidp->fbdev = NULL; - } + drm_fb_cma_fbdev_fini(drm); drm_kms_helper_poll_fini(drm); fbdev_fail: pm_runtime_get_sync(dev); @@ -681,10 +659,7 @@ static void malidp_unbind(struct device *dev) struct malidp_drm *malidp = drm->dev_private; drm_dev_unregister(drm); - if (malidp->fbdev) { - drm_fbdev_cma_fini(malidp->fbdev); - malidp->fbdev = NULL; - } + drm_fb_cma_fbdev_fini(drm); drm_kms_helper_poll_fini(drm); pm_runtime_get_sync(dev); malidp_se_irq_fini(drm); diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h index 70ed6aeccf052192c50c3b9f48f3c135d112e786..e0d12c9fc6b8093c4a0aaa8f7bacf0798dea2e11 100644 --- a/drivers/gpu/drm/arm/malidp_drv.h +++ b/drivers/gpu/drm/arm/malidp_drv.h @@ -20,7 +20,6 @@ struct malidp_drm { struct malidp_hw_device *dev; - struct drm_fbdev_cma *fbdev; struct drm_crtc crtc; wait_queue_head_t wq; atomic_t config_valid; diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2e065facdce74cae05cdc7b86fdcf9643d886ba0..e2adfbef7d6b631b74acd031d8bf53ea5213ad2c 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "armada_crtc.h" #include "armada_drm.h" #include "armada_fb.h" @@ -20,13 +21,6 @@ #include "armada_hw.h" #include "armada_trace.h" -struct armada_frame_work { - struct armada_plane_work work; - struct drm_pending_vblank_event *event; - struct armada_regs regs[4]; - struct drm_framebuffer *old_fb; -}; - enum csc_mode { CSC_AUTO = 0, CSC_YUV_CCIR601 = 1, @@ -168,16 +162,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc) void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y) { + const struct drm_format_info *format = fb->format; + unsigned int num_planes = format->num_planes; u32 addr = drm_fb_obj(fb)->dev_addr; - int num_planes = fb->format->num_planes; int i; if (num_planes > 3) num_planes = 3; - for (i = 0; i < num_planes; i++) + addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] + + x * format->cpp[0]; + + y /= format->vsub; + x /= format->hsub; + + for (i = 1; i < num_planes; i++) addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] + - x * fb->format->cpp[i]; + x * format->cpp[i]; for (; i < 3; i++) addrs[i] = 0; } @@ -209,6 +210,38 @@ static unsigned armada_drm_crtc_calc_fb(struct drm_framebuffer *fb, return i; } +static void armada_drm_plane_work_call(struct armada_crtc *dcrtc, + struct armada_plane_work *work, + void (*fn)(struct armada_crtc *, struct armada_plane_work *)) +{ + struct armada_plane *dplane = drm_to_armada_plane(work->plane); + struct drm_pending_vblank_event *event; + struct drm_framebuffer *fb; + + if (fn) + fn(dcrtc, work); + drm_crtc_vblank_put(&dcrtc->crtc); + + event = work->event; + fb = work->old_fb; + if (event || fb) { + struct drm_device *dev = dcrtc->crtc.dev; + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + if (event) + drm_crtc_send_vblank_event(&dcrtc->crtc, event); + if (fb) + __armada_drm_queue_unref_work(dev, fb); + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + if (work->need_kfree) + kfree(work); + + wake_up(&dplane->frame_wait); +} + static void armada_drm_plane_work_run(struct armada_crtc *dcrtc, struct drm_plane *plane) { @@ -216,24 +249,19 @@ static void armada_drm_plane_work_run(struct armada_crtc *dcrtc, struct armada_plane_work *work = xchg(&dplane->work, NULL); /* Handle any pending frame work. */ - if (work) { - work->fn(dcrtc, dplane, work); - drm_crtc_vblank_put(&dcrtc->crtc); - } - - wake_up(&dplane->frame_wait); + if (work) + armada_drm_plane_work_call(dcrtc, work, work->fn); } int armada_drm_plane_work_queue(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work) + struct armada_plane_work *work) { + struct armada_plane *plane = drm_to_armada_plane(work->plane); int ret; ret = drm_crtc_vblank_get(&dcrtc->crtc); - if (ret) { - DRM_ERROR("failed to acquire vblank counter\n"); + if (ret) return ret; - } ret = cmpxchg(&plane->work, NULL, work) ? -EBUSY : 0; if (ret) @@ -247,51 +275,60 @@ int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout) return wait_event_timeout(plane->frame_wait, !plane->work, timeout); } -struct armada_plane_work *armada_drm_plane_work_cancel( - struct armada_crtc *dcrtc, struct armada_plane *plane) +void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc, + struct armada_plane *dplane) { - struct armada_plane_work *work = xchg(&plane->work, NULL); + struct armada_plane_work *work = xchg(&dplane->work, NULL); if (work) - drm_crtc_vblank_put(&dcrtc->crtc); - - return work; + armada_drm_plane_work_call(dcrtc, work, work->cancel); } -static int armada_drm_crtc_queue_frame_work(struct armada_crtc *dcrtc, - struct armada_frame_work *work) +static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc, + struct armada_plane_work *work) { - struct armada_plane *plane = drm_to_armada_plane(dcrtc->crtc.primary); + unsigned long flags; - return armada_drm_plane_work_queue(dcrtc, plane, &work->work); + spin_lock_irqsave(&dcrtc->irq_lock, flags); + armada_drm_crtc_update_regs(dcrtc, work->regs); + spin_unlock_irqrestore(&dcrtc->irq_lock, flags); } -static void armada_drm_crtc_complete_frame_work(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work) +static void armada_drm_crtc_complete_disable_work(struct armada_crtc *dcrtc, + struct armada_plane_work *work) { - struct armada_frame_work *fwork = container_of(work, struct armada_frame_work, work); - struct drm_device *dev = dcrtc->crtc.dev; unsigned long flags; + if (dcrtc->plane == work->plane) + dcrtc->plane = NULL; + spin_lock_irqsave(&dcrtc->irq_lock, flags); - armada_drm_crtc_update_regs(dcrtc, fwork->regs); + armada_drm_crtc_update_regs(dcrtc, work->regs); spin_unlock_irqrestore(&dcrtc->irq_lock, flags); +} - if (fwork->event) { - spin_lock_irqsave(&dev->event_lock, flags); - drm_crtc_send_vblank_event(&dcrtc->crtc, fwork->event); - spin_unlock_irqrestore(&dev->event_lock, flags); - } +static struct armada_plane_work * +armada_drm_crtc_alloc_plane_work(struct drm_plane *plane) +{ + struct armada_plane_work *work; + int i = 0; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return NULL; - /* Finally, queue the process-half of the cleanup. */ - __armada_drm_queue_unref_work(dcrtc->crtc.dev, fwork->old_fb); - kfree(fwork); + work->plane = plane; + work->fn = armada_drm_crtc_complete_frame_work; + work->need_kfree = true; + armada_reg_queue_end(work->regs, i); + + return work; } static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc, struct drm_framebuffer *fb, bool force) { - struct armada_frame_work *work; + struct armada_plane_work *work; if (!fb) return; @@ -302,15 +339,11 @@ static void armada_drm_crtc_finish_fb(struct armada_crtc *dcrtc, return; } - work = kmalloc(sizeof(*work), GFP_KERNEL); + work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary); if (work) { - int i = 0; - work->work.fn = armada_drm_crtc_complete_frame_work; - work->event = NULL; work->old_fb = fb; - armada_reg_queue_end(work->regs, i); - if (armada_drm_crtc_queue_frame_work(dcrtc, work) == 0) + if (armada_drm_plane_work_queue(dcrtc, work) == 0) return; kfree(work); @@ -373,8 +406,11 @@ static void armada_drm_crtc_prepare(struct drm_crtc *crtc) * the new mode parameters. */ plane = dcrtc->plane; - if (plane) + if (plane) { drm_plane_force_disable(plane); + WARN_ON(!armada_drm_plane_work_wait(drm_to_armada_plane(plane), + HZ)); + } } /* The mode_config.mutex will be held for this call */ @@ -440,11 +476,11 @@ static void armada_drm_crtc_irq(struct armada_crtc *dcrtc, u32 stat) if (stat & VSYNC_IRQ) drm_crtc_handle_vblank(&dcrtc->crtc); - spin_lock(&dcrtc->irq_lock); ovl_plane = dcrtc->plane; if (ovl_plane) armada_drm_plane_work_run(dcrtc, ovl_plane); + spin_lock(&dcrtc->irq_lock); if (stat & GRA_FRAME_IRQ && dcrtc->interlaced) { int i = stat & GRA_FRAME_IRQ0 ? 0 : 1; uint32_t val; @@ -536,18 +572,14 @@ static uint32_t armada_drm_crtc_calculate_csc(struct armada_crtc *dcrtc) return val; } -static void armada_drm_primary_set(struct drm_crtc *crtc, - struct drm_plane *plane, int x, int y) +static void armada_drm_gra_plane_regs(struct armada_regs *regs, + struct drm_framebuffer *fb, struct armada_plane_state *state, + int x, int y, bool interlaced) { - struct armada_plane_state *state = &drm_to_armada_plane(plane)->state; - struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - struct armada_regs regs[8]; - bool interlaced = dcrtc->interlaced; - unsigned i; + unsigned int i; u32 ctrl0; - i = armada_drm_crtc_calc_fb(plane->fb, x, y, regs, interlaced); - + i = armada_drm_crtc_calc_fb(fb, x, y, regs, interlaced); armada_reg_queue_set(regs, i, state->dst_yx, LCD_SPU_GRA_OVSA_HPXL_VLN); armada_reg_queue_set(regs, i, state->src_hw, LCD_SPU_GRA_HPXL_VLN); armada_reg_queue_set(regs, i, state->dst_hw, LCD_SPU_GZM_HPXL_VLN); @@ -559,9 +591,21 @@ static void armada_drm_primary_set(struct drm_crtc *crtc, armada_reg_queue_mod(regs, i, ctrl0, CFG_GRAFORMAT | CFG_GRA_MOD(CFG_SWAPRB | CFG_SWAPUV | CFG_SWAPYU | CFG_YUV2RGB) | - CFG_PALETTE_ENA | CFG_GRA_FTOGGLE, + CFG_PALETTE_ENA | CFG_GRA_FTOGGLE | + CFG_GRA_HSMOOTH | CFG_GRA_ENA, LCD_SPU_DMA_CTRL0); armada_reg_queue_end(regs, i); +} + +static void armada_drm_primary_set(struct drm_crtc *crtc, + struct drm_plane *plane, int x, int y) +{ + struct armada_plane_state *state = &drm_to_armada_plane(plane)->state; + struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct armada_regs regs[8]; + bool interlaced = dcrtc->interlaced; + + armada_drm_gra_plane_regs(regs, plane->fb, state, x, y, interlaced); armada_drm_crtc_update_regs(dcrtc, regs); } @@ -581,7 +625,7 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, interlaced = !!(adj->flags & DRM_MODE_FLAG_INTERLACE); - val = CFG_GRA_ENA | CFG_GRA_HSMOOTH; + val = CFG_GRA_ENA; val |= CFG_GRA_FMT(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->fmt); val |= CFG_GRA_MOD(drm_fb_to_armada_fb(dcrtc->crtc.primary->fb)->mod); @@ -633,8 +677,6 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, /* Now compute the divider for real */ dcrtc->variant->compute_clock(dcrtc, adj, &sclk); - /* Ensure graphic fifo is enabled */ - armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1); armada_reg_queue_set(regs, i, sclk, LCD_CFG_SCLK_DIV); if (interlaced ^ dcrtc->interlaced) { @@ -647,6 +689,9 @@ static int armada_drm_crtc_mode_set(struct drm_crtc *crtc, spin_lock_irqsave(&dcrtc->irq_lock, flags); + /* Ensure graphic fifo is enabled */ + armada_reg_queue_mod(regs, i, 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1); + /* Even interlaced/progressive frame */ dcrtc->v[1].spu_v_h_total = adj->crtc_vtotal << 16 | adj->crtc_htotal; @@ -729,47 +774,13 @@ static int armada_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, return 0; } -void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, - struct drm_plane *plane) -{ - u32 sram_para1, dma_ctrl0_mask; - - /* - * Drop our reference on any framebuffer attached to this plane. - * We don't need to NULL this out as drm_plane_force_disable(), - * and __setplane_internal() will do so for an overlay plane, and - * __drm_helper_disable_unused_functions() will do so for the - * primary plane. - */ - if (plane->fb) - drm_framebuffer_put(plane->fb); - - /* Power down the Y/U/V FIFOs */ - sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; - - /* Power down most RAMs and FIFOs if this is the primary plane */ - if (plane->type == DRM_PLANE_TYPE_PRIMARY) { - sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | - CFG_PDWN32x32 | CFG_PDWN64x66; - dma_ctrl0_mask = CFG_GRA_ENA; - } else { - dma_ctrl0_mask = CFG_DMA_ENA; - } - - spin_lock_irq(&dcrtc->irq_lock); - armada_updatel(0, dma_ctrl0_mask, dcrtc->base + LCD_SPU_DMA_CTRL0); - spin_unlock_irq(&dcrtc->irq_lock); - - armada_updatel(sram_para1, 0, dcrtc->base + LCD_SPU_SRAM_PARA1); -} - /* The mode_config.mutex will be held for this call */ static void armada_drm_crtc_disable(struct drm_crtc *crtc) { - struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - armada_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); - armada_drm_crtc_plane_disable(dcrtc, crtc->primary); + + /* Disable our primary plane when we disable the CRTC. */ + crtc->primary->funcs->disable_plane(crtc->primary, NULL); } static const struct drm_crtc_helper_funcs armada_crtc_helper_funcs = { @@ -879,9 +890,11 @@ static int armada_drm_crtc_cursor_update(struct armada_crtc *dcrtc, bool reload) return 0; } + spin_lock_irq(&dcrtc->irq_lock); para1 = readl_relaxed(dcrtc->base + LCD_SPU_SRAM_PARA1); armada_updatel(CFG_CSB_256x32, CFG_CSB_256x32 | CFG_PDWN256x32, dcrtc->base + LCD_SPU_SRAM_PARA1); + spin_unlock_irq(&dcrtc->irq_lock); /* * Initialize the transparency if the SRAM was powered down. @@ -1021,7 +1034,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - struct armada_frame_work *work; + struct armada_plane_work *work; unsigned i; int ret; @@ -1029,11 +1042,10 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, if (fb->format != crtc->primary->fb->format) return -EINVAL; - work = kmalloc(sizeof(*work), GFP_KERNEL); + work = armada_drm_crtc_alloc_plane_work(dcrtc->crtc.primary); if (!work) return -ENOMEM; - work->work.fn = armada_drm_crtc_complete_frame_work; work->event = event; work->old_fb = dcrtc->crtc.primary->fb; @@ -1047,7 +1059,7 @@ static int armada_drm_crtc_page_flip(struct drm_crtc *crtc, */ drm_framebuffer_get(fb); - ret = armada_drm_crtc_queue_frame_work(dcrtc, work); + ret = armada_drm_plane_work_queue(dcrtc, work); if (ret) { /* Undo our reference above */ drm_framebuffer_put(fb); @@ -1127,14 +1139,195 @@ static const struct drm_crtc_funcs armada_crtc_funcs = { .disable_vblank = armada_drm_crtc_disable_vblank, }; +static void armada_drm_primary_update_state(struct drm_plane_state *state, + struct armada_regs *regs) +{ + struct armada_plane *dplane = drm_to_armada_plane(state->plane); + struct armada_crtc *dcrtc = drm_to_armada_crtc(state->crtc); + struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb); + bool was_disabled; + unsigned int idx = 0; + u32 val; + + val = CFG_GRA_FMT(dfb->fmt) | CFG_GRA_MOD(dfb->mod); + if (dfb->fmt > CFG_420) + val |= CFG_PALETTE_ENA; + if (state->visible) + val |= CFG_GRA_ENA; + if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst)) + val |= CFG_GRA_HSMOOTH; + + was_disabled = !(dplane->state.ctrl0 & CFG_GRA_ENA); + if (was_disabled) + armada_reg_queue_mod(regs, idx, + 0, CFG_PDWN64x66, LCD_SPU_SRAM_PARA1); + + dplane->state.ctrl0 = val; + dplane->state.src_hw = (drm_rect_height(&state->src) & 0xffff0000) | + drm_rect_width(&state->src) >> 16; + dplane->state.dst_hw = drm_rect_height(&state->dst) << 16 | + drm_rect_width(&state->dst); + dplane->state.dst_yx = state->dst.y1 << 16 | state->dst.x1; + + armada_drm_gra_plane_regs(regs + idx, &dfb->fb, &dplane->state, + state->src.x1 >> 16, state->src.y1 >> 16, + dcrtc->interlaced); + + dplane->state.vsync_update = !was_disabled; + dplane->state.changed = true; +} + +static int armada_drm_primary_update(struct drm_plane *plane, + struct drm_crtc *crtc, struct drm_framebuffer *fb, + int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, + uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, + struct drm_modeset_acquire_ctx *ctx) +{ + struct armada_plane *dplane = drm_to_armada_plane(plane); + struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct armada_plane_work *work; + struct drm_plane_state state = { + .plane = plane, + .crtc = crtc, + .fb = fb, + .src_x = src_x, + .src_y = src_y, + .src_w = src_w, + .src_h = src_h, + .crtc_x = crtc_x, + .crtc_y = crtc_y, + .crtc_w = crtc_w, + .crtc_h = crtc_h, + .rotation = DRM_MODE_ROTATE_0, + }; + const struct drm_rect clip = { + .x2 = crtc->mode.hdisplay, + .y2 = crtc->mode.vdisplay, + }; + int ret; + + ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0, + INT_MAX, true, false); + if (ret) + return ret; + + work = &dplane->works[dplane->next_work]; + work->fn = armada_drm_crtc_complete_frame_work; + + if (plane->fb != fb) { + /* + * Take a reference on the new framebuffer - we want to + * hold on to it while the hardware is displaying it. + */ + drm_framebuffer_reference(fb); + + work->old_fb = plane->fb; + } else { + work->old_fb = NULL; + } + + armada_drm_primary_update_state(&state, work->regs); + + if (!dplane->state.changed) + return 0; + + /* Wait for pending work to complete */ + if (armada_drm_plane_work_wait(dplane, HZ / 10) == 0) + armada_drm_plane_work_cancel(dcrtc, dplane); + + if (!dplane->state.vsync_update) { + work->fn(dcrtc, work); + if (work->old_fb) + drm_framebuffer_unreference(work->old_fb); + return 0; + } + + /* Queue it for update on the next interrupt if we are enabled */ + ret = armada_drm_plane_work_queue(dcrtc, work); + if (ret) { + work->fn(dcrtc, work); + if (work->old_fb) + drm_framebuffer_unreference(work->old_fb); + } + + dplane->next_work = !dplane->next_work; + + return 0; +} + +int armada_drm_plane_disable(struct drm_plane *plane, + struct drm_modeset_acquire_ctx *ctx) +{ + struct armada_plane *dplane = drm_to_armada_plane(plane); + struct armada_crtc *dcrtc; + struct armada_plane_work *work; + unsigned int idx = 0; + u32 sram_para1, enable_mask; + + if (!plane->crtc) + return 0; + + /* + * Arrange to power down most RAMs and FIFOs if this is the primary + * plane, otherwise just the YUV FIFOs for the overlay plane. + */ + if (plane->type == DRM_PLANE_TYPE_PRIMARY) { + sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 | + CFG_PDWN32x32 | CFG_PDWN64x66; + enable_mask = CFG_GRA_ENA; + } else { + sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66; + enable_mask = CFG_DMA_ENA; + } + + dplane->state.ctrl0 &= ~enable_mask; + + dcrtc = drm_to_armada_crtc(plane->crtc); + + /* + * Try to disable the plane and drop our ref on the framebuffer + * at the next frame update. If we fail for any reason, disable + * the plane immediately. + */ + work = &dplane->works[dplane->next_work]; + work->fn = armada_drm_crtc_complete_disable_work; + work->cancel = armada_drm_crtc_complete_disable_work; + work->old_fb = plane->fb; + + armada_reg_queue_mod(work->regs, idx, + 0, enable_mask, LCD_SPU_DMA_CTRL0); + armada_reg_queue_mod(work->regs, idx, + sram_para1, 0, LCD_SPU_SRAM_PARA1); + armada_reg_queue_end(work->regs, idx); + + /* Wait for any preceding work to complete, but don't wedge */ + if (WARN_ON(!armada_drm_plane_work_wait(dplane, HZ))) + armada_drm_plane_work_cancel(dcrtc, dplane); + + if (armada_drm_plane_work_queue(dcrtc, work)) { + work->fn(dcrtc, work); + if (work->old_fb) + drm_framebuffer_unreference(work->old_fb); + } + + dplane->next_work = !dplane->next_work; + + return 0; +} + static const struct drm_plane_funcs armada_primary_plane_funcs = { - .update_plane = drm_primary_helper_update, - .disable_plane = drm_primary_helper_disable, + .update_plane = armada_drm_primary_update, + .disable_plane = armada_drm_plane_disable, .destroy = drm_primary_helper_destroy, }; int armada_drm_plane_init(struct armada_plane *plane) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(plane->works); i++) + plane->works[i].plane = &plane->base; + init_waitqueue_head(&plane->frame_wait); return 0; @@ -1225,17 +1418,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc", dcrtc); - if (ret < 0) { - kfree(dcrtc); - return ret; - } + if (ret < 0) + goto err_crtc; if (dcrtc->variant->init) { ret = dcrtc->variant->init(dcrtc, dev); - if (ret) { - kfree(dcrtc); - return ret; - } + if (ret) + goto err_crtc; } /* Ensure AXI pipeline is enabled */ @@ -1246,13 +1435,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, dcrtc->crtc.port = port; primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (!primary) - return -ENOMEM; + if (!primary) { + ret = -ENOMEM; + goto err_crtc; + } ret = armada_drm_plane_init(primary); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_universal_plane_init(drm, &primary->base, 0, @@ -1263,7 +1454,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); - return ret; + goto err_crtc; } ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL, @@ -1282,6 +1473,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, err_crtc_init: primary->base.funcs->destroy(&primary->base); +err_crtc: + kfree(dcrtc); + return ret; } diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h index bab11f48357591325d5bab2f80de34981f4d5482..445829b8877af4b328c2fb8d1402a69700612c7b 100644 --- a/drivers/gpu/drm/armada/armada_crtc.h +++ b/drivers/gpu/drm/armada/armada_crtc.h @@ -36,21 +36,31 @@ struct armada_plane; struct armada_variant; struct armada_plane_work { - void (*fn)(struct armada_crtc *, - struct armada_plane *, - struct armada_plane_work *); + void (*fn)(struct armada_crtc *, struct armada_plane_work *); + void (*cancel)(struct armada_crtc *, struct armada_plane_work *); + bool need_kfree; + struct drm_plane *plane; + struct drm_framebuffer *old_fb; + struct drm_pending_vblank_event *event; + struct armada_regs regs[14]; }; struct armada_plane_state { + u16 src_x; + u16 src_y; u32 src_hw; u32 dst_hw; u32 dst_yx; u32 ctrl0; + bool changed; + bool vsync_update; }; struct armada_plane { struct drm_plane base; wait_queue_head_t frame_wait; + bool next_work; + struct armada_plane_work works[2]; struct armada_plane_work *work; struct armada_plane_state state; }; @@ -58,10 +68,10 @@ struct armada_plane { int armada_drm_plane_init(struct armada_plane *plane); int armada_drm_plane_work_queue(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work); + struct armada_plane_work *work); int armada_drm_plane_work_wait(struct armada_plane *plane, long timeout); -struct armada_plane_work *armada_drm_plane_work_cancel( - struct armada_crtc *dcrtc, struct armada_plane *plane); +void armada_drm_plane_work_cancel(struct armada_crtc *dcrtc, + struct armada_plane *plane); void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb, int x, int y); @@ -104,8 +114,8 @@ struct armada_crtc { void armada_drm_crtc_update_regs(struct armada_crtc *, struct armada_regs *); -void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, - struct drm_plane *plane); +int armada_drm_plane_disable(struct drm_plane *plane, + struct drm_modeset_acquire_ctx *ctx); extern struct platform_driver armada_lcd_platform_driver; diff --git a/drivers/gpu/drm/armada/armada_drm.h b/drivers/gpu/drm/armada/armada_drm.h index b064879ecdbd1073a379dbb438854094cdd30f80..cc4c557c9f66421737c77bd4dd92e977ab4fc75a 100644 --- a/drivers/gpu/drm/armada/armada_drm.h +++ b/drivers/gpu/drm/armada/armada_drm.h @@ -84,7 +84,6 @@ void armada_drm_queue_unref_work(struct drm_device *, extern const struct drm_mode_config_funcs armada_drm_mode_config_funcs; int armada_fbdev_init(struct drm_device *); -void armada_fbdev_lastclose(struct drm_device *); void armada_fbdev_fini(struct drm_device *); int armada_overlay_plane_create(struct drm_device *, unsigned long); diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index e857b88a9799427e701ffb8f119a5572e3e3ab72..4b11b6b52f1debe27a974e85d67e1826ba6eeb2b 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "armada_crtc.h" #include "armada_drm.h" @@ -54,15 +55,10 @@ static struct drm_ioctl_desc armada_ioctls[] = { DRM_IOCTL_DEF_DRV(ARMADA_GEM_PWRITE, armada_gem_pwrite_ioctl, 0), }; -static void armada_drm_lastclose(struct drm_device *dev) -{ - armada_fbdev_lastclose(dev); -} - DEFINE_DRM_GEM_FOPS(armada_drm_fops); static struct drm_driver armada_drm_driver = { - .lastclose = armada_drm_lastclose, + .lastclose = drm_fb_helper_lastclose, .gem_free_object_unlocked = armada_gem_free_object, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index a38d5a0892a97d0e2c7debd50621fa10d6f9c704..ac92bce07ecd92aedffe35ea481df054e24175b8 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -154,16 +154,7 @@ static struct drm_framebuffer *armada_fb_create(struct drm_device *dev, return ERR_PTR(ret); } -static void armada_output_poll_changed(struct drm_device *dev) -{ - struct armada_private *priv = dev->dev_private; - struct drm_fb_helper *fbh = priv->fbdev; - - if (fbh) - drm_fb_helper_hotplug_event(fbh); -} - const struct drm_mode_config_funcs armada_drm_mode_config_funcs = { .fb_create = armada_fb_create, - .output_poll_changed = armada_output_poll_changed, + .output_poll_changed = drm_fb_helper_output_poll_changed, }; diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c index a2ce83f8480009e5bdc29988da10fdff614bf6dd..2a59db0994b2e704d92edb5b0ca1b03d1888737b 100644 --- a/drivers/gpu/drm/armada/armada_fbdev.c +++ b/drivers/gpu/drm/armada/armada_fbdev.c @@ -159,14 +159,6 @@ int armada_fbdev_init(struct drm_device *dev) return ret; } -void armada_fbdev_lastclose(struct drm_device *dev) -{ - struct armada_private *priv = dev->dev_private; - - if (priv->fbdev) - drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev); -} - void armada_fbdev_fini(struct drm_device *dev) { struct armada_private *priv = dev->dev_private; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index b411b608821a555320a16df12fe56a524a3c4dae..77b55adaa2acd8e6d2c98e553946eb4df5f42890 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -7,7 +7,7 @@ * published by the Free Software Foundation. */ #include -#include +#include #include "armada_crtc.h" #include "armada_drm.h" #include "armada_fb.h" @@ -32,11 +32,6 @@ struct armada_ovl_plane_properties { struct armada_ovl_plane { struct armada_plane base; - struct drm_framebuffer *old_fb; - struct { - struct armada_plane_work work; - struct armada_regs regs[13]; - } vbl; struct armada_ovl_plane_properties prop; }; #define drm_to_armada_ovl_plane(p) \ @@ -67,218 +62,204 @@ armada_ovl_update_attr(struct armada_ovl_plane_properties *prop, spin_unlock_irq(&dcrtc->irq_lock); } -static void armada_ovl_retire_fb(struct armada_ovl_plane *dplane, - struct drm_framebuffer *fb) -{ - struct drm_framebuffer *old_fb; - - old_fb = xchg(&dplane->old_fb, fb); - - if (old_fb) - armada_drm_queue_unref_work(dplane->base.base.dev, old_fb); -} - /* === Plane support === */ static void armada_ovl_plane_work(struct armada_crtc *dcrtc, - struct armada_plane *plane, struct armada_plane_work *work) + struct armada_plane_work *work) { - struct armada_ovl_plane *dplane = container_of(plane, struct armada_ovl_plane, base); + unsigned long flags; - trace_armada_ovl_plane_work(&dcrtc->crtc, &plane->base); + trace_armada_ovl_plane_work(&dcrtc->crtc, work->plane); - armada_drm_crtc_update_regs(dcrtc, dplane->vbl.regs); - armada_ovl_retire_fb(dplane, NULL); + spin_lock_irqsave(&dcrtc->irq_lock, flags); + armada_drm_crtc_update_regs(dcrtc, work->regs); + spin_unlock_irqrestore(&dcrtc->irq_lock, flags); } -static int -armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h, - uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) +static void armada_ovl_plane_update_state(struct drm_plane_state *state, + struct armada_regs *regs) { - struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); - struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - struct drm_rect src = { - .x1 = src_x, - .y1 = src_y, - .x2 = src_x + src_w, - .y2 = src_y + src_h, - }; - struct drm_rect dest = { - .x1 = crtc_x, - .y1 = crtc_y, - .x2 = crtc_x + crtc_w, - .y2 = crtc_y + crtc_h, - }; - const struct drm_rect clip = { - .x2 = crtc->mode.hdisplay, - .y2 = crtc->mode.vdisplay, - }; - uint32_t val, ctrl0; - unsigned idx = 0; - bool visible; - int ret; - - trace_armada_ovl_plane_update(plane, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - src_x, src_y, src_w, src_h); - - ret = drm_plane_helper_check_update(plane, crtc, fb, &src, &dest, &clip, - DRM_MODE_ROTATE_0, - 0, INT_MAX, true, false, &visible); - if (ret) - return ret; - - ctrl0 = CFG_DMA_FMT(drm_fb_to_armada_fb(fb)->fmt) | - CFG_DMA_MOD(drm_fb_to_armada_fb(fb)->mod) | - CFG_CBSH_ENA | CFG_DMA_HSMOOTH | CFG_DMA_ENA; - - /* Does the position/size result in nothing to display? */ - if (!visible) - ctrl0 &= ~CFG_DMA_ENA; - - if (!dcrtc->plane) { - dcrtc->plane = plane; - armada_ovl_update_attr(&dplane->prop, dcrtc); - } - - /* FIXME: overlay on an interlaced display */ - /* Just updating the position/size? */ - if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) { - val = (drm_rect_height(&src) & 0xffff0000) | - drm_rect_width(&src) >> 16; - dplane->base.state.src_hw = val; - writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_HPXL_VLN); - - val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest); - dplane->base.state.dst_hw = val; - writel_relaxed(val, dcrtc->base + LCD_SPU_DZM_HPXL_VLN); - - val = dest.y1 << 16 | dest.x1; - dplane->base.state.dst_yx = val; - writel_relaxed(val, dcrtc->base + LCD_SPU_DMA_OVSA_HPXL_VLN); - - return 0; - } else if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) { + struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(state->plane); + struct armada_framebuffer *dfb = drm_fb_to_armada_fb(state->fb); + const struct drm_format_info *format; + unsigned int idx = 0; + bool fb_changed; + u32 val, ctrl0; + u16 src_x, src_y; + + ctrl0 = CFG_DMA_FMT(dfb->fmt) | CFG_DMA_MOD(dfb->mod) | CFG_CBSH_ENA; + if (state->visible) + ctrl0 |= CFG_DMA_ENA; + if (drm_rect_width(&state->src) >> 16 != drm_rect_width(&state->dst)) + ctrl0 |= CFG_DMA_HSMOOTH; + + /* + * Shifting a YUV packed format image by one pixel causes the U/V + * planes to swap. Compensate for it by also toggling the UV swap. + */ + format = dfb->fb.format; + if (format->num_planes == 1 && state->src.x1 >> 16 & (format->hsub - 1)) + ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + + if (~dplane->base.state.ctrl0 & ctrl0 & CFG_DMA_ENA) { /* Power up the Y/U/V FIFOs on ENA 0->1 transitions */ - armada_updatel(0, CFG_PDWN16x66 | CFG_PDWN32x66, - dcrtc->base + LCD_SPU_SRAM_PARA1); + armada_reg_queue_mod(regs, idx, + 0, CFG_PDWN16x66 | CFG_PDWN32x66, + LCD_SPU_SRAM_PARA1); } - if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) - armada_drm_plane_work_cancel(dcrtc, &dplane->base); - - if (plane->fb != fb) { - u32 addrs[3], pixel_format; - int num_planes, hsub; - - /* - * Take a reference on the new framebuffer - we want to - * hold on to it while the hardware is displaying it. - */ - drm_framebuffer_get(fb); - - if (plane->fb) - armada_ovl_retire_fb(dplane, plane->fb); + fb_changed = dplane->base.base.fb != &dfb->fb || + dplane->base.state.src_x != state->src.x1 >> 16 || + dplane->base.state.src_y != state->src.y1 >> 16; - src_y = src.y1 >> 16; - src_x = src.x1 >> 16; + dplane->base.state.vsync_update = fb_changed; - armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y); + /* FIXME: overlay on an interlaced display */ + if (fb_changed) { + u32 addrs[3]; - pixel_format = fb->format->format; - hsub = drm_format_horz_chroma_subsampling(pixel_format); - num_planes = fb->format->num_planes; + dplane->base.state.src_y = src_y = state->src.y1 >> 16; + dplane->base.state.src_x = src_x = state->src.x1 >> 16; - /* - * Annoyingly, shifting a YUYV-format image by one pixel - * causes the U/V planes to toggle. Toggle the UV swap. - * (Unfortunately, this causes momentary colour flickering.) - */ - if (src_x & (hsub - 1) && num_planes == 1) - ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV); + armada_drm_plane_calc_addrs(addrs, &dfb->fb, src_x, src_y); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], + armada_reg_queue_set(regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y0); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], + armada_reg_queue_set(regs, idx, addrs[1], LCD_SPU_DMA_START_ADDR_U0); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2], + armada_reg_queue_set(regs, idx, addrs[2], LCD_SPU_DMA_START_ADDR_V0); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0], + armada_reg_queue_set(regs, idx, addrs[0], LCD_SPU_DMA_START_ADDR_Y1); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1], + armada_reg_queue_set(regs, idx, addrs[1], LCD_SPU_DMA_START_ADDR_U1); - armada_reg_queue_set(dplane->vbl.regs, idx, addrs[2], + armada_reg_queue_set(regs, idx, addrs[2], LCD_SPU_DMA_START_ADDR_V1); - val = fb->pitches[0] << 16 | fb->pitches[0]; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + val = dfb->fb.pitches[0] << 16 | dfb->fb.pitches[0]; + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_PITCH_YC); - val = fb->pitches[1] << 16 | fb->pitches[2]; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + val = dfb->fb.pitches[1] << 16 | dfb->fb.pitches[2]; + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_PITCH_UV); } - val = (drm_rect_height(&src) & 0xffff0000) | drm_rect_width(&src) >> 16; + val = (drm_rect_height(&state->src) & 0xffff0000) | + drm_rect_width(&state->src) >> 16; if (dplane->base.state.src_hw != val) { dplane->base.state.src_hw = val; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_HPXL_VLN); } - val = drm_rect_height(&dest) << 16 | drm_rect_width(&dest); + val = drm_rect_height(&state->dst) << 16 | drm_rect_width(&state->dst); if (dplane->base.state.dst_hw != val) { dplane->base.state.dst_hw = val; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + armada_reg_queue_set(regs, idx, val, LCD_SPU_DZM_HPXL_VLN); } - val = dest.y1 << 16 | dest.x1; + val = state->dst.y1 << 16 | state->dst.x1; if (dplane->base.state.dst_yx != val) { dplane->base.state.dst_yx = val; - armada_reg_queue_set(dplane->vbl.regs, idx, val, + armada_reg_queue_set(regs, idx, val, LCD_SPU_DMA_OVSA_HPXL_VLN); } if (dplane->base.state.ctrl0 != ctrl0) { dplane->base.state.ctrl0 = ctrl0; - armada_reg_queue_mod(dplane->vbl.regs, idx, ctrl0, + armada_reg_queue_mod(regs, idx, ctrl0, CFG_CBSH_ENA | CFG_DMAFORMAT | CFG_DMA_FTOGGLE | CFG_DMA_HSMOOTH | CFG_DMA_TSTMODE | CFG_DMA_MOD(CFG_SWAPRB | CFG_SWAPUV | CFG_SWAPYU | CFG_YUV2RGB) | CFG_DMA_ENA, LCD_SPU_DMA_CTRL0); + dplane->base.state.vsync_update = true; } - if (idx) { - armada_reg_queue_end(dplane->vbl.regs, idx); - armada_drm_plane_work_queue(dcrtc, &dplane->base, - &dplane->vbl.work); - } - return 0; + + dplane->base.state.changed = idx != 0; + + armada_reg_queue_end(regs, idx); } -static int armada_ovl_plane_disable(struct drm_plane *plane, - struct drm_modeset_acquire_ctx *ctx) +static int +armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int crtc_x, int crtc_y, unsigned crtc_w, unsigned crtc_h, + uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, + struct drm_modeset_acquire_ctx *ctx) { struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane); - struct drm_framebuffer *fb; - struct armada_crtc *dcrtc; + struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); + struct armada_plane_work *work; + struct drm_plane_state state = { + .plane = plane, + .crtc = crtc, + .fb = fb, + .src_x = src_x, + .src_y = src_y, + .src_w = src_w, + .src_h = src_h, + .crtc_x = crtc_x, + .crtc_y = crtc_y, + .crtc_w = crtc_w, + .crtc_h = crtc_h, + .rotation = DRM_MODE_ROTATE_0, + }; + const struct drm_rect clip = { + .x2 = crtc->mode.hdisplay, + .y2 = crtc->mode.vdisplay, + }; + int ret; - if (!dplane->base.base.crtc) + trace_armada_ovl_plane_update(plane, crtc, fb, + crtc_x, crtc_y, crtc_w, crtc_h, + src_x, src_y, src_w, src_h); + + ret = drm_atomic_helper_check_plane_state(&state, crtc->state, &clip, 0, + INT_MAX, true, false); + if (ret) + return ret; + + work = &dplane->base.works[dplane->base.next_work]; + + if (plane->fb != fb) { + /* + * Take a reference on the new framebuffer - we want to + * hold on to it while the hardware is displaying it. + */ + drm_framebuffer_reference(fb); + + work->old_fb = plane->fb; + } else { + work->old_fb = NULL; + } + + armada_ovl_plane_update_state(&state, work->regs); + + if (!dplane->base.state.changed) return 0; - dcrtc = drm_to_armada_crtc(dplane->base.base.crtc); + /* Wait for pending work to complete */ + if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0) + armada_drm_plane_work_cancel(dcrtc, &dplane->base); + + /* Just updating the position/size? */ + if (!dplane->base.state.vsync_update) { + armada_ovl_plane_work(dcrtc, work); + return 0; + } - armada_drm_plane_work_cancel(dcrtc, &dplane->base); - armada_drm_crtc_plane_disable(dcrtc, plane); + if (!dcrtc->plane) { + dcrtc->plane = plane; + armada_ovl_update_attr(&dplane->prop, dcrtc); + } - dcrtc->plane = NULL; - dplane->base.state.ctrl0 = 0; + /* Queue it for update on the next interrupt if we are enabled */ + ret = armada_drm_plane_work_queue(dcrtc, work); + if (ret) + DRM_ERROR("failed to queue plane work: %d\n", ret); - fb = xchg(&dplane->old_fb, NULL); - if (fb) - drm_framebuffer_put(fb); + dplane->base.next_work = !dplane->base.next_work; return 0; } @@ -362,7 +343,7 @@ static int armada_ovl_plane_set_property(struct drm_plane *plane, static const struct drm_plane_funcs armada_ovl_plane_funcs = { .update_plane = armada_ovl_plane_update, - .disable_plane = armada_ovl_plane_disable, + .disable_plane = armada_drm_plane_disable, .destroy = armada_ovl_plane_destroy, .set_property = armada_ovl_plane_set_property, }; @@ -454,7 +435,8 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs) return ret; } - dplane->vbl.work.fn = armada_ovl_plane_work; + dplane->base.works[0].fn = armada_ovl_plane_work; + dplane->base.works[1].fn = armada_ovl_plane_work; ret = drm_universal_plane_init(dev, &dplane->base.base, crtcs, &armada_ovl_plane_funcs, diff --git a/drivers/gpu/drm/armada/armada_trace.h b/drivers/gpu/drm/armada/armada_trace.h index 8dbfea7a00fe328fe953dae40720e5be97c935c7..f03a56bda59615860a41a7ec0fa356a0adb8f437 100644 --- a/drivers/gpu/drm/armada/armada_trace.h +++ b/drivers/gpu/drm/armada/armada_trace.h @@ -34,14 +34,34 @@ TRACE_EVENT(armada_ovl_plane_update, __field(struct drm_plane *, plane) __field(struct drm_crtc *, crtc) __field(struct drm_framebuffer *, fb) + __field(int, crtc_x) + __field(int, crtc_y) + __field(unsigned int, crtc_w) + __field(unsigned int, crtc_h) + __field(u32, src_x) + __field(u32, src_y) + __field(u32, src_w) + __field(u32, src_h) ), TP_fast_assign( __entry->plane = plane; __entry->crtc = crtc; __entry->fb = fb; + __entry->crtc_x = crtc_x; + __entry->crtc_y = crtc_y; + __entry->crtc_w = crtc_w; + __entry->crtc_h = crtc_h; + __entry->src_x = src_x; + __entry->src_y = src_y; + __entry->src_w = src_w; + __entry->src_h = src_h; ), - TP_printk("plane %p crtc %p fb %p", - __entry->plane, __entry->crtc, __entry->fb) + TP_printk("plane %p crtc %p fb %p crtc @ (%d,%d, %ux%u) src @ (%u,%u, %ux%u)", + __entry->plane, __entry->crtc, __entry->fb, + __entry->crtc_x, __entry->crtc_y, + __entry->crtc_w, __entry->crtc_h, + __entry->src_x >> 16, __entry->src_y >> 16, + __entry->src_w >> 16, __entry->src_h >> 16) ); TRACE_EVENT(armada_ovl_plane_work, diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 28da7c2b7ed9dc060cacc0e67a4df45c4f1e0233..7b784d91e258e9c424fa9cad901f0de03f435080 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -216,9 +216,10 @@ static struct ttm_tt *ast_ttm_tt_create(struct ttm_bo_device *bdev, return tt; } -static int ast_ttm_tt_populate(struct ttm_tt *ttm) +static int ast_ttm_tt_populate(struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) { - return ttm_pool_populate(ttm); + return ttm_pool_populate(ttm, ctx); } static void ast_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -237,7 +238,6 @@ struct ttm_bo_driver ast_bo_driver = { .verify_access = ast_bo_verify_access, .io_mem_reserve = &ast_ttm_io_mem_reserve, .io_mem_free = &ast_ttm_io_mem_free, - .io_mem_pfn = ttm_bo_default_io_mem_pfn, }; int ast_mm_init(struct ast_private *ast) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index c6e8061ffcfc181415f67a697646c77d3fbb1417..c1ea5c36b0061a9220a0c168eab6a8d588054f25 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -461,13 +461,6 @@ static struct drm_framebuffer *atmel_hlcdc_fb_create(struct drm_device *dev, return drm_gem_fb_create(dev, file_priv, mode_cmd); } -static void atmel_hlcdc_fb_output_poll_changed(struct drm_device *dev) -{ - struct atmel_hlcdc_dc *dc = dev->dev_private; - - drm_fbdev_cma_hotplug_event(dc->fbdev); -} - struct atmel_hlcdc_dc_commit { struct work_struct work; struct drm_device *dev; @@ -563,7 +556,7 @@ static int atmel_hlcdc_dc_atomic_commit(struct drm_device *dev, static const struct drm_mode_config_funcs mode_config_funcs = { .fb_create = atmel_hlcdc_fb_create, - .output_poll_changed = atmel_hlcdc_fb_output_poll_changed, + .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = drm_atomic_helper_check, .atomic_commit = atmel_hlcdc_dc_atomic_commit, }; @@ -665,10 +658,7 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev) platform_set_drvdata(pdev, dev); - dc->fbdev = drm_fbdev_cma_init(dev, 24, - dev->mode_config.num_connector); - if (IS_ERR(dc->fbdev)) - dc->fbdev = NULL; + drm_fb_cma_fbdev_init(dev, 24, 0); drm_kms_helper_poll_init(dev); @@ -688,8 +678,7 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev) { struct atmel_hlcdc_dc *dc = dev->dev_private; - if (dc->fbdev) - drm_fbdev_cma_fini(dc->fbdev); + drm_fb_cma_fbdev_fini(dev); flush_workqueue(dc->wq); drm_kms_helper_poll_fini(dev); drm_mode_config_cleanup(dev); @@ -705,13 +694,6 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev) destroy_workqueue(dc->wq); } -static void atmel_hlcdc_dc_lastclose(struct drm_device *dev) -{ - struct atmel_hlcdc_dc *dc = dev->dev_private; - - drm_fbdev_cma_restore_mode(dc->fbdev); -} - static int atmel_hlcdc_dc_irq_postinstall(struct drm_device *dev) { struct atmel_hlcdc_dc *dc = dev->dev_private; @@ -744,7 +726,7 @@ static struct drm_driver atmel_hlcdc_dc_driver = { .driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC, - .lastclose = atmel_hlcdc_dc_lastclose, + .lastclose = drm_fb_helper_lastclose, .irq_handler = atmel_hlcdc_dc_irq_handler, .irq_preinstall = atmel_hlcdc_dc_irq_uninstall, .irq_postinstall = atmel_hlcdc_dc_irq_postinstall, diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h index 6833ee253cfacafbbbf218340a3e1f8983ca805a..ab32d5b268d2d4e1ff18e1befe24e8e42f874ad0 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -374,7 +375,6 @@ struct atmel_hlcdc_dc { const struct atmel_hlcdc_dc_desc *desc; struct dma_pool *dscrpool; struct atmel_hlcdc *hlcdc; - struct drm_fbdev_cma *fbdev; struct drm_crtc *crtc; struct atmel_hlcdc_layer *layers[ATMEL_HLCDC_MAX_LAYERS]; struct workqueue_struct *wq; diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c index 8250b5e612d25f22b3a826aaf9c9c3c7a9a5f459..704e879711e4f74a7b275a76019c283a1140df42 100644 --- a/drivers/gpu/drm/bochs/bochs_mm.c +++ b/drivers/gpu/drm/bochs/bochs_mm.c @@ -205,7 +205,6 @@ struct ttm_bo_driver bochs_bo_driver = { .verify_access = bochs_bo_verify_access, .io_mem_reserve = &bochs_ttm_io_mem_reserve, .io_mem_free = &bochs_ttm_io_mem_free, - .io_mem_pfn = ttm_bo_default_io_mem_pfn, }; int bochs_mm_init(struct bochs_device *bochs) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 5dd3f1cd074a1d36f17022af23c7a23638eaa41e..a8905049b9da022802a44cb836c22fbb86f90560 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -946,7 +946,9 @@ static int analogix_dp_get_modes(struct drm_connector *connector) return 0; } + pm_runtime_get_sync(dp->dev); edid = drm_get_edid(connector, &dp->aux.ddc); + pm_runtime_put(dp->dev); if (edid) { drm_mode_connector_update_edid_property(&dp->connector, edid); diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index 2a5b54d3a03aedb1d7b60a635f3b8cea32939338..a8e31ea07382552869b5b6c2196b4fe2260d6abe 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -216,9 +216,10 @@ static struct ttm_tt *cirrus_ttm_tt_create(struct ttm_bo_device *bdev, return tt; } -static int cirrus_ttm_tt_populate(struct ttm_tt *ttm) +static int cirrus_ttm_tt_populate(struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) { - return ttm_pool_populate(ttm); + return ttm_pool_populate(ttm, ctx); } static void cirrus_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -237,7 +238,6 @@ struct ttm_bo_driver cirrus_bo_driver = { .verify_access = cirrus_bo_verify_access, .io_mem_reserve = &cirrus_ttm_io_mem_reserve, .io_mem_free = &cirrus_ttm_io_mem_free, - .io_mem_pfn = ttm_bo_default_io_mem_pfn, }; int cirrus_mm_init(struct cirrus_device *cirrus) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 37445d50816a5c2654b5b145c3c397044a429db1..b76d49218cf1de132be35d1ccb9226f8ba8a7704 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -50,7 +50,8 @@ EXPORT_SYMBOL(__drm_crtc_commit_free); * @state: atomic state * * Free all the memory allocated by drm_atomic_state_init. - * This is useful for drivers that subclass the atomic state. + * This should only be used by drivers which are still subclassing + * &drm_atomic_state and haven't switched to &drm_private_state yet. */ void drm_atomic_state_default_release(struct drm_atomic_state *state) { @@ -67,7 +68,8 @@ EXPORT_SYMBOL(drm_atomic_state_default_release); * @state: atomic state * * Default implementation for filling in a new atomic state. - * This is useful for drivers that subclass the atomic state. + * This should only be used by drivers which are still subclassing + * &drm_atomic_state and haven't switched to &drm_private_state yet. */ int drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) @@ -132,7 +134,8 @@ EXPORT_SYMBOL(drm_atomic_state_alloc); * @state: atomic state * * Default implementation for clearing atomic state. - * This is useful for drivers that subclass the atomic state. + * This should only be used by drivers which are still subclassing + * &drm_atomic_state and haven't switched to &drm_private_state yet. */ void drm_atomic_state_default_clear(struct drm_atomic_state *state) { @@ -946,6 +949,42 @@ static void drm_atomic_plane_print_state(struct drm_printer *p, plane->funcs->atomic_print_state(p, state); } +/** + * DOC: handling driver private state + * + * Very often the DRM objects exposed to userspace in the atomic modeset api + * (&drm_connector, &drm_crtc and &drm_plane) do not map neatly to the + * underlying hardware. Especially for any kind of shared resources (e.g. shared + * clocks, scaler units, bandwidth and fifo limits shared among a group of + * planes or CRTCs, and so on) it makes sense to model these as independent + * objects. Drivers then need to do similar state tracking and commit ordering for + * such private (since not exposed to userpace) objects as the atomic core and + * helpers already provide for connectors, planes and CRTCs. + * + * To make this easier on drivers the atomic core provides some support to track + * driver private state objects using struct &drm_private_obj, with the + * associated state struct &drm_private_state. + * + * Similar to userspace-exposed objects, private state structures can be + * acquired by calling drm_atomic_get_private_obj_state(). Since this function + * does not take care of locking, drivers should wrap it for each type of + * private state object they have with the required call to drm_modeset_lock() + * for the corresponding &drm_modeset_lock. + * + * All private state structures contained in a &drm_atomic_state update can be + * iterated using for_each_oldnew_private_obj_in_state(), + * for_each_new_private_obj_in_state() and for_each_old_private_obj_in_state(). + * Drivers are recommended to wrap these for each type of driver private state + * object they have, filtering on &drm_private_obj.funcs using for_each_if(), at + * least if they want to iterate over all objects of a given type. + * + * An earlier way to handle driver private state was by subclassing struct + * &drm_atomic_state. But since that encourages non-standard ways to implement + * the check/commit split atomic requires (by using e.g. "check and rollback or + * commit instead" of "duplicate state, check, then either commit or release + * duplicated state) it is deprecated in favour of using &drm_private_state. + */ + /** * drm_atomic_private_obj_init - initialize private object * @obj: private object diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 2e5e089dd9123fa324e1a7d343d8bb47a200739b..4c62dff14893646f7e818d1fb87947da52a3ccef 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -214,9 +214,11 @@ EXPORT_SYMBOL(drm_rotation_simplify); * This function initializes generic mutable zpos property and enables support * for it in drm core. Drivers can then attach this property to planes to enable * support for configurable planes arrangement during blending operation. - * Once mutable zpos property has been enabled, the DRM core will automatically - * calculate &drm_plane_state.normalized_zpos values. Usually min should be set - * to 0 and max to maximal number of planes for given crtc - 1. + * Drivers that attach a mutable zpos property to any plane should call the + * drm_atomic_normalize_zpos() helper during their implementation of + * &drm_mode_config_funcs.atomic_check(), which will update the normalized zpos + * values and store them in &drm_plane_state.normalized_zpos. Usually min + * should be set to 0 and max to maximal number of planes for given crtc - 1. * * If zpos of some planes cannot be changed (like fixed background or * cursor/topmost planes), driver should adjust min/max values and assign those diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 624edeb5c50d38ecedbed490c8ce05fca11fed99..e6a21e69059cdc312a6b079f75b9716345f3f4db 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -153,6 +153,25 @@ static void drm_connector_free(struct kref *kref) connector->funcs->destroy(connector); } +void drm_connector_free_work_fn(struct work_struct *work) +{ + struct drm_connector *connector, *n; + struct drm_device *dev = + container_of(work, struct drm_device, mode_config.connector_free_work); + struct drm_mode_config *config = &dev->mode_config; + unsigned long flags; + struct llist_node *freed; + + spin_lock_irqsave(&config->connector_list_lock, flags); + freed = llist_del_all(&config->connector_free_list); + spin_unlock_irqrestore(&config->connector_list_lock, flags); + + llist_for_each_entry_safe(connector, n, freed, free_node) { + drm_mode_object_unregister(dev, &connector->base); + connector->funcs->destroy(connector); + } +} + /** * drm_connector_init - Init a preallocated connector * @dev: DRM device @@ -532,6 +551,25 @@ void drm_connector_list_iter_begin(struct drm_device *dev, } EXPORT_SYMBOL(drm_connector_list_iter_begin); +/* + * Extra-safe connector put function that works in any context. Should only be + * used from the connector_iter functions, where we never really expect to + * actually release the connector when dropping our final reference. + */ +static void +__drm_connector_put_safe(struct drm_connector *conn) +{ + struct drm_mode_config *config = &conn->dev->mode_config; + + lockdep_assert_held(&config->connector_list_lock); + + if (!refcount_dec_and_test(&conn->base.refcount.refcount)) + return; + + llist_add(&conn->free_node, &config->connector_free_list); + schedule_work(&config->connector_free_work); +} + /** * drm_connector_list_iter_next - return next connector * @iter: connectr_list iterator @@ -561,10 +599,10 @@ drm_connector_list_iter_next(struct drm_connector_list_iter *iter) /* loop until it's not a zombie connector */ } while (!kref_get_unless_zero(&iter->conn->base.refcount)); - spin_unlock_irqrestore(&config->connector_list_lock, flags); if (old_conn) - drm_connector_put(old_conn); + __drm_connector_put_safe(old_conn); + spin_unlock_irqrestore(&config->connector_list_lock, flags); return iter->conn; } @@ -581,9 +619,15 @@ EXPORT_SYMBOL(drm_connector_list_iter_next); */ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter) { + struct drm_mode_config *config = &iter->dev->mode_config; + unsigned long flags; + iter->dev = NULL; - if (iter->conn) - drm_connector_put(iter->conn); + if (iter->conn) { + spin_lock_irqsave(&config->connector_list_lock, flags); + __drm_connector_put_safe(iter->conn); + spin_unlock_irqrestore(&config->connector_list_lock, flags); + } lock_release(&connector_list_iter_dep_map, 0, _RET_IP_); } EXPORT_SYMBOL(drm_connector_list_iter_end); @@ -1229,6 +1273,19 @@ int drm_mode_connector_update_edid_property(struct drm_connector *connector, if (edid) size = EDID_LENGTH * (1 + edid->extensions); + /* Set the display info, using edid if available, otherwise + * reseting the values to defaults. This duplicates the work + * done in drm_add_edid_modes, but that function is not + * consistently called before this one in all drivers and the + * computation is cheap enough that it seems better to + * duplicate it rather than attempt to ensure some arbitrary + * ordering of calls. + */ + if (edid) + drm_add_display_info(connector, edid); + else + drm_reset_display_info(connector); + drm_object_property_set_value(&connector->base, dev->mode_config.non_desktop_property, connector->display_info.non_desktop); diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 9ebb8841778cc99095a2235ab8b1d89f654816f2..af00f42ba269b0da3111ac5be480688815875da8 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -142,6 +142,7 @@ int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj, uint64_t value); int drm_connector_create_standard_properties(struct drm_device *dev); const char *drm_get_connector_force_name(enum drm_connector_force force); +void drm_connector_free_work_fn(struct work_struct *work); /* IOCTL */ int drm_mode_connector_property_set_ioctl(struct drm_device *dev, diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 524eace3d460fe66841884c7e434f8d5b0406d85..ddd5379145758014f1a5a9915abb1528edc1cf17 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1731,7 +1731,7 @@ EXPORT_SYMBOL(drm_edid_duplicate); * * Returns true if @vendor is in @edid, false otherwise */ -static bool edid_vendor(struct edid *edid, const char *vendor) +static bool edid_vendor(const struct edid *edid, const char *vendor) { char edid_vendor[3]; @@ -1749,7 +1749,7 @@ static bool edid_vendor(struct edid *edid, const char *vendor) * * This tells subsequent routines what fixes they need to apply. */ -static u32 edid_get_quirks(struct edid *edid) +static u32 edid_get_quirks(const struct edid *edid) { const struct edid_quirk *quirk; int i; @@ -2813,7 +2813,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid, /* * Search EDID for CEA extension block. */ -static u8 *drm_find_edid_extension(struct edid *edid, int ext_id) +static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id) { u8 *edid_ext = NULL; int i; @@ -2835,12 +2835,12 @@ static u8 *drm_find_edid_extension(struct edid *edid, int ext_id) return edid_ext; } -static u8 *drm_find_cea_extension(struct edid *edid) +static u8 *drm_find_cea_extension(const struct edid *edid) { return drm_find_edid_extension(edid, CEA_EXT); } -static u8 *drm_find_displayid_extension(struct edid *edid) +static u8 *drm_find_displayid_extension(const struct edid *edid) { return drm_find_edid_extension(edid, DISPLAYID_EXT); } @@ -4378,7 +4378,7 @@ drm_parse_hdmi_vsdb_video(struct drm_connector *connector, const u8 *db) } static void drm_parse_cea_ext(struct drm_connector *connector, - struct edid *edid) + const struct edid *edid) { struct drm_display_info *info = &connector->display_info; const u8 *edid_ext; @@ -4412,11 +4412,34 @@ static void drm_parse_cea_ext(struct drm_connector *connector, } } -static void drm_add_display_info(struct drm_connector *connector, - struct edid *edid, u32 quirks) +/* A connector has no EDID information, so we've got no EDID to compute quirks from. Reset + * all of the values which would have been set from EDID + */ +void +drm_reset_display_info(struct drm_connector *connector) { struct drm_display_info *info = &connector->display_info; + info->width_mm = 0; + info->height_mm = 0; + + info->bpc = 0; + info->color_formats = 0; + info->cea_rev = 0; + info->max_tmds_clock = 0; + info->dvi_dual = false; + info->has_hdmi_infoframe = false; + + info->non_desktop = 0; +} +EXPORT_SYMBOL_GPL(drm_reset_display_info); + +u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid) +{ + struct drm_display_info *info = &connector->display_info; + + u32 quirks = edid_get_quirks(edid); + info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; @@ -4430,11 +4453,13 @@ static void drm_add_display_info(struct drm_connector *connector, info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); + DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop); + if (edid->revision < 3) - return; + return quirks; if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) - return; + return quirks; drm_parse_cea_ext(connector, edid); @@ -4454,7 +4479,7 @@ static void drm_add_display_info(struct drm_connector *connector, /* Only defined for 1.4 with digital displays */ if (edid->revision < 4) - return; + return quirks; switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) { case DRM_EDID_DIGITAL_DEPTH_6: @@ -4489,7 +4514,9 @@ static void drm_add_display_info(struct drm_connector *connector, info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422) info->color_formats |= DRM_COLOR_FORMAT_YCRCB422; + return quirks; } +EXPORT_SYMBOL_GPL(drm_add_display_info); static int validate_displayid(u8 *displayid, int length, int idx) { @@ -4645,8 +4672,6 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) return 0; } - quirks = edid_get_quirks(edid); - drm_edid_to_eld(connector, edid); /* @@ -4654,7 +4679,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid) * To avoid multiple parsing of same block, lets parse that map * from sink info, before parsing CEA modes. */ - drm_add_display_info(connector, edid, quirks); + quirks = drm_add_display_info(connector, edid); /* * EDID spec says modes should be preferred in this order: @@ -4846,6 +4871,11 @@ EXPORT_SYMBOL(drm_hdmi_avi_infoframe_from_display_mode); * @mode: DRM display mode * @rgb_quant_range: RGB quantization range (Q) * @rgb_quant_range_selectable: Sink support selectable RGB quantization range (QS) + * @is_hdmi2_sink: HDMI 2.0 sink, which has different default recommendations + * + * Note that @is_hdmi2_sink can be derived by looking at the + * &drm_scdc.supported flag stored in &drm_hdmi_info.scdc, + * &drm_display_info.hdmi, which can be found in &drm_connector.display_info. */ void drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame, diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c index 35b56dfba92949dca9c27ffb002ce9317c1b1f5a..186d00adfb5f86675bfd27a8dd9d812e1475d9fe 100644 --- a/drivers/gpu/drm/drm_fb_cma_helper.c +++ b/drivers/gpu/drm/drm_fb_cma_helper.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #define DEFAULT_FBDEFIO_DELAY_MS 50 @@ -42,7 +43,7 @@ struct drm_fbdev_cma { * callback function to create a cma backed framebuffer. * * An fbdev framebuffer backed by cma is also available by calling - * drm_fbdev_cma_init(). drm_fbdev_cma_fini() tears it down. + * drm_fb_cma_fbdev_init(). drm_fb_cma_fbdev_fini() tears it down. * If the &drm_framebuffer_funcs.dirty callback is set, fb_deferred_io will be * set up automatically. &drm_framebuffer_funcs.dirty is called by * drm_fb_helper_deferred_io() in process context (&struct delayed_work). @@ -68,7 +69,7 @@ struct drm_fbdev_cma { * * Initialize:: * - * fbdev = drm_fbdev_cma_init_with_funcs(dev, 16, + * fbdev = drm_fb_cma_fbdev_init_with_funcs(dev, 16, * dev->mode_config.num_crtc, * dev->mode_config.num_connector, * &driver_fb_funcs); @@ -256,7 +257,7 @@ drm_fbdev_cma_create(struct drm_fb_helper *helper, fbi->screen_size = size; fbi->fix.smem_len = size; - if (fbdev_cma->fb_funcs->dirty) { + if (fb->funcs->dirty) { ret = drm_fbdev_cma_defio_init(fbi, obj); if (ret) goto err_cma_destroy; @@ -277,6 +278,118 @@ static const struct drm_fb_helper_funcs drm_fb_cma_helper_funcs = { .fb_probe = drm_fbdev_cma_create, }; +/** + * drm_fb_cma_fbdev_init_with_funcs() - Allocate and initialize fbdev emulation + * @dev: DRM device + * @preferred_bpp: Preferred bits per pixel for the device. + * @dev->mode_config.preferred_depth is used if this is zero. + * @max_conn_count: Maximum number of connectors. + * @dev->mode_config.num_connector is used if this is zero. + * @funcs: Framebuffer functions, in particular a custom dirty() callback. + * Can be NULL. + * + * Returns: + * Zero on success or negative error code on failure. + */ +int drm_fb_cma_fbdev_init_with_funcs(struct drm_device *dev, + unsigned int preferred_bpp, unsigned int max_conn_count, + const struct drm_framebuffer_funcs *funcs) +{ + struct drm_fbdev_cma *fbdev_cma; + struct drm_fb_helper *fb_helper; + int ret; + + if (!preferred_bpp) + preferred_bpp = dev->mode_config.preferred_depth; + if (!preferred_bpp) + preferred_bpp = 32; + + if (!max_conn_count) + max_conn_count = dev->mode_config.num_connector; + + fbdev_cma = kzalloc(sizeof(*fbdev_cma), GFP_KERNEL); + if (!fbdev_cma) + return -ENOMEM; + + fbdev_cma->fb_funcs = funcs; + fb_helper = &fbdev_cma->fb_helper; + + drm_fb_helper_prepare(dev, fb_helper, &drm_fb_cma_helper_funcs); + + ret = drm_fb_helper_init(dev, fb_helper, max_conn_count); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to initialize fbdev helper.\n"); + goto err_free; + } + + ret = drm_fb_helper_single_add_all_connectors(fb_helper); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to add connectors.\n"); + goto err_drm_fb_helper_fini; + } + + ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to set fbdev configuration.\n"); + goto err_drm_fb_helper_fini; + } + + return 0; + +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); +err_free: + kfree(fbdev_cma); + + return ret; +} +EXPORT_SYMBOL_GPL(drm_fb_cma_fbdev_init_with_funcs); + +/** + * drm_fb_cma_fbdev_init() - Allocate and initialize fbdev emulation + * @dev: DRM device + * @preferred_bpp: Preferred bits per pixel for the device. + * @dev->mode_config.preferred_depth is used if this is zero. + * @max_conn_count: Maximum number of connectors. + * @dev->mode_config.num_connector is used if this is zero. + * + * Returns: + * Zero on success or negative error code on failure. + */ +int drm_fb_cma_fbdev_init(struct drm_device *dev, unsigned int preferred_bpp, + unsigned int max_conn_count) +{ + return drm_fb_cma_fbdev_init_with_funcs(dev, preferred_bpp, + max_conn_count, NULL); +} +EXPORT_SYMBOL_GPL(drm_fb_cma_fbdev_init); + +/** + * drm_fb_cma_fbdev_fini() - Teardown fbdev emulation + * @dev: DRM device + */ +void drm_fb_cma_fbdev_fini(struct drm_device *dev) +{ + struct drm_fb_helper *fb_helper = dev->fb_helper; + + if (!fb_helper) + return; + + /* Unregister if it hasn't been done already */ + if (fb_helper->fbdev && fb_helper->fbdev->dev) + drm_fb_helper_unregister_fbi(fb_helper); + + if (fb_helper->fbdev) + drm_fbdev_cma_defio_fini(fb_helper->fbdev); + + if (fb_helper->fb) + drm_framebuffer_remove(fb_helper->fb); + + drm_fb_helper_fini(fb_helper); + kfree(to_fbdev_cma(fb_helper)); +} +EXPORT_SYMBOL_GPL(drm_fb_cma_fbdev_fini); + /** * drm_fbdev_cma_init_with_funcs() - Allocate and initializes a drm_fbdev_cma struct * @dev: DRM device diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 04a3a5ce370a059d8fb36f700a7f93a48182e2c6..035784ddd13363eec627425cf7e6a2600b9b7469 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -66,19 +66,23 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * helper functions used by many drivers to implement the kernel mode setting * interfaces. * - * Initialization is done as a four-step process with drm_fb_helper_prepare(), - * drm_fb_helper_init(), drm_fb_helper_single_add_all_connectors() and - * drm_fb_helper_initial_config(). Drivers with fancier requirements than the - * default behaviour can override the third step with their own code. - * Teardown is done with drm_fb_helper_fini() after the fbdev device is - * unregisters using drm_fb_helper_unregister_fbi(). - * - * At runtime drivers should restore the fbdev console by calling - * drm_fb_helper_restore_fbdev_mode_unlocked() from their &drm_driver.lastclose - * callback. They should also notify the fb helper code from updates to the - * output configuration by calling drm_fb_helper_hotplug_event(). For easier - * integration with the output polling code in drm_crtc_helper.c the modeset - * code provides a &drm_mode_config_funcs.output_poll_changed callback. + * Setup fbdev emulation by calling drm_fb_helper_fbdev_setup() and tear it + * down by calling drm_fb_helper_fbdev_teardown(). + * + * Drivers that need to handle connector hotplugging (e.g. dp mst) can't use + * the setup helper and will need to do the whole four-step setup process with + * drm_fb_helper_prepare(), drm_fb_helper_init(), + * drm_fb_helper_single_add_all_connectors(), enable hotplugging and + * drm_fb_helper_initial_config() to avoid a possible race window. + * + * At runtime drivers should restore the fbdev console by using + * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback. + * They should also notify the fb helper code from updates to the output + * configuration by using drm_fb_helper_output_poll_changed() as their + * &drm_mode_config_funcs.output_poll_changed callback. + * + * For suspend/resume consider using drm_mode_config_helper_suspend() and + * drm_mode_config_helper_resume() which takes care of fbdev as well. * * All other functions exported by the fb helper library can be used to * implement the fbdev driver interface by the driver. @@ -103,7 +107,8 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * always run in process context since the fb_*() function could be running in * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io * callback it will also schedule dirty_work with the damage collected from the - * mmap page writes. + * mmap page writes. Drivers can use drm_fb_helper_defio_init() to setup + * deferred I/O (coupled with drm_fb_helper_fbdev_teardown()). */ #define drm_fb_helper_for_each_connector(fbh, i__) \ @@ -1024,6 +1029,49 @@ void drm_fb_helper_deferred_io(struct fb_info *info, } EXPORT_SYMBOL(drm_fb_helper_deferred_io); +/** + * drm_fb_helper_defio_init - fbdev deferred I/O initialization + * @fb_helper: driver-allocated fbdev helper + * + * This function allocates &fb_deferred_io, sets callback to + * drm_fb_helper_deferred_io(), delay to 50ms and calls fb_deferred_io_init(). + * It should be called from the &drm_fb_helper_funcs->fb_probe callback. + * drm_fb_helper_fbdev_teardown() cleans up deferred I/O. + * + * NOTE: A copy of &fb_ops is made and assigned to &info->fbops. This is done + * because fb_deferred_io_cleanup() clears &fbops->fb_mmap and would thereby + * affect other instances of that &fb_ops. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper) +{ + struct fb_info *info = fb_helper->fbdev; + struct fb_deferred_io *fbdefio; + struct fb_ops *fbops; + + fbdefio = kzalloc(sizeof(*fbdefio), GFP_KERNEL); + fbops = kzalloc(sizeof(*fbops), GFP_KERNEL); + if (!fbdefio || !fbops) { + kfree(fbdefio); + kfree(fbops); + return -ENOMEM; + } + + info->fbdefio = fbdefio; + fbdefio->delay = msecs_to_jiffies(50); + fbdefio->deferred_io = drm_fb_helper_deferred_io; + + *fbops = *info->fbops; + info->fbops = fbops; + + fb_deferred_io_init(info); + + return 0; +} +EXPORT_SYMBOL(drm_fb_helper_defio_init); + /** * drm_fb_helper_sys_read - wrapper around fb_sys_read * @info: fb_info struct pointer @@ -1848,6 +1896,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, if (ret < 0) return ret; + strcpy(fb_helper->fb->comm, "[fbcon]"); return 0; } @@ -2729,6 +2778,120 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) } EXPORT_SYMBOL(drm_fb_helper_hotplug_event); +/** + * drm_fb_helper_fbdev_setup() - Setup fbdev emulation + * @dev: DRM device + * @fb_helper: fbdev helper structure to set up + * @funcs: fbdev helper functions + * @preferred_bpp: Preferred bits per pixel for the device. + * @dev->mode_config.preferred_depth is used if this is zero. + * @max_conn_count: Maximum number of connectors. + * @dev->mode_config.num_connector is used if this is zero. + * + * This function sets up fbdev emulation and registers fbdev for access by + * userspace. If all connectors are disconnected, setup is deferred to the next + * time drm_fb_helper_hotplug_event() is called. + * The caller must to provide a &drm_fb_helper_funcs->fb_probe callback + * function. + * + * See also: drm_fb_helper_initial_config() + * + * Returns: + * Zero on success or negative error code on failure. + */ +int drm_fb_helper_fbdev_setup(struct drm_device *dev, + struct drm_fb_helper *fb_helper, + const struct drm_fb_helper_funcs *funcs, + unsigned int preferred_bpp, + unsigned int max_conn_count) +{ + int ret; + + if (!preferred_bpp) + preferred_bpp = dev->mode_config.preferred_depth; + if (!preferred_bpp) + preferred_bpp = 32; + + if (!max_conn_count) + max_conn_count = dev->mode_config.num_connector; + if (!max_conn_count) { + DRM_DEV_ERROR(dev->dev, "No connectors\n"); + return -EINVAL; + } + + drm_fb_helper_prepare(dev, fb_helper, funcs); + + ret = drm_fb_helper_init(dev, fb_helper, max_conn_count); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to initialize fbdev helper\n"); + return ret; + } + + ret = drm_fb_helper_single_add_all_connectors(fb_helper); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to add connectors\n"); + goto err_drm_fb_helper_fini; + } + + if (!drm_drv_uses_atomic_modeset(dev)) + drm_helper_disable_unused_functions(dev); + + ret = drm_fb_helper_initial_config(fb_helper, preferred_bpp); + if (ret < 0) { + DRM_DEV_ERROR(dev->dev, "Failed to set fbdev configuration\n"); + goto err_drm_fb_helper_fini; + } + + return 0; + +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); + + return ret; +} +EXPORT_SYMBOL(drm_fb_helper_fbdev_setup); + +/** + * drm_fb_helper_fbdev_teardown - Tear down fbdev emulation + * @dev: DRM device + * + * This function unregisters fbdev if not already done and cleans up the + * associated resources including the &drm_framebuffer. + * The driver is responsible for freeing the &drm_fb_helper structure which is + * stored in &drm_device->fb_helper. Do note that this pointer has been cleared + * when this function returns. + * + * In order to support device removal/unplug while file handles are still open, + * drm_fb_helper_unregister_fbi() should be called on device removal and + * drm_fb_helper_fbdev_teardown() in the &drm_driver->release callback when + * file handles are closed. + */ +void drm_fb_helper_fbdev_teardown(struct drm_device *dev) +{ + struct drm_fb_helper *fb_helper = dev->fb_helper; + struct fb_ops *fbops = NULL; + + if (!fb_helper) + return; + + /* Unregister if it hasn't been done already */ + if (fb_helper->fbdev && fb_helper->fbdev->dev) + drm_fb_helper_unregister_fbi(fb_helper); + + if (fb_helper->fbdev && fb_helper->fbdev->fbdefio) { + fb_deferred_io_cleanup(fb_helper->fbdev); + kfree(fb_helper->fbdev->fbdefio); + fbops = fb_helper->fbdev->fbops; + } + + drm_fb_helper_fini(fb_helper); + kfree(fbops); + + if (fb_helper->fb) + drm_framebuffer_remove(fb_helper->fb); +} +EXPORT_SYMBOL(drm_fb_helper_fbdev_teardown); + /** * drm_fb_helper_lastclose - DRM driver lastclose helper for fbdev emulation * @dev: DRM device diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index d63d4c2ac4c8126e8d38cdcc32638893cb1fcfb2..5a13ff29f4f04af99c61fa07261a35a44ac0dd8e 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -664,6 +664,7 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, INIT_LIST_HEAD(&fb->filp_head); fb->funcs = funcs; + strcpy(fb->comm, current->comm); ret = __drm_mode_object_add(dev, &fb->base, DRM_MODE_OBJECT_FB, false, drm_framebuffer_free); @@ -978,6 +979,7 @@ void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent, struct drm_format_name_buf format_name; unsigned int i; + drm_printf_indent(p, indent, "allocated by = %s\n", fb->comm); drm_printf_indent(p, indent, "refcount=%u\n", drm_framebuffer_read_refcount(fb)); drm_printf_indent(p, indent, "format=%s\n", diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index aa8cb9bfa49900387dd448522a34f1cef2c053b5..4d682a6e8bcbc6d780c6545dd282f9b69d103069 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -272,7 +272,8 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb); * @sizes: fbdev size description * @pitch_align: Optional pitch alignment * @obj: GEM object backing the framebuffer - * @funcs: vtable to be used for the new framebuffer object + * @funcs: Optional vtable to be used for the new framebuffer object when the + * dirty callback is needed. * * This function creates a framebuffer from a &drm_fb_helper_surface_size * description for use in the &drm_fb_helper_funcs.fb_probe callback. @@ -300,6 +301,9 @@ drm_gem_fbdev_fb_create(struct drm_device *dev, if (obj->size < mode_cmd.pitches[0] * mode_cmd.height) return ERR_PTR(-EINVAL); + if (!funcs) + funcs = &drm_gem_fb_funcs; + return drm_gem_fb_alloc(dev, &mode_cmd, &obj, 1, funcs); } EXPORT_SYMBOL(drm_gem_fbdev_fb_create); diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index d1eb56a1eff4078d0d29b33ac2f17b06c4ae0d76..1402c0e71b03d18866139056b12f0d5fd84b6afb 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr mutex_lock(&dev->mode_config.idr_mutex); - /* Insert the new lessee into the tree */ - id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); - if (id < 0) { - error = id; - goto out_lessee; - } - - lessee->lessee_id = id; - lessee->lessor = drm_master_get(lessor); - list_add_tail(&lessee->lessee_list, &lessor->lessees); - idr_for_each_entry(leases, entry, object) { error = 0; if (!idr_find(&dev->mode_config.crtc_idr, object)) @@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr } } + /* Insert the new lessee into the tree */ + id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL); + if (id < 0) { + error = id; + goto out_lessee; + } + + lessee->lessee_id = id; + lessee->lessor = drm_master_get(lessor); + list_add_tail(&lessee->lessee_list, &lessor->lessees); + /* Move the leases over */ lessee->leases = *leases; DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor); @@ -254,10 +254,10 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr return lessee; out_lessee: - drm_master_put(&lessee); - mutex_unlock(&dev->mode_config.idr_mutex); + drm_master_put(&lessee); + return ERR_PTR(error); } diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index eb86bc3f753b36dd85e2244ba97a77a8e525768b..186c4e90cc1cd25e06a19a89da1bf88a7d9e0cf7 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -575,21 +575,23 @@ EXPORT_SYMBOL(drm_mm_remove_node); */ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) { + struct drm_mm *mm = old->mm; + DRM_MM_BUG_ON(!old->allocated); *new = *old; list_replace(&old->node_list, &new->node_list); - rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root); + rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree); if (drm_mm_hole_follows(old)) { list_replace(&old->hole_stack, &new->hole_stack); rb_replace_node(&old->rb_hole_size, &new->rb_hole_size, - &old->mm->holes_size); + &mm->holes_size); rb_replace_node(&old->rb_hole_addr, &new->rb_hole_addr, - &old->mm->holes_addr); + &mm->holes_addr); } old->allocated = false; diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index cda8bfab6d3b49e3e31516ae5895b878ed4e7581..e5c653357024dc5ea522706df1a03be1f12d094d 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -382,6 +382,9 @@ void drm_mode_config_init(struct drm_device *dev) ida_init(&dev->mode_config.connector_ida); spin_lock_init(&dev->mode_config.connector_list_lock); + init_llist_head(&dev->mode_config.connector_free_list); + INIT_WORK(&dev->mode_config.connector_free_work, drm_connector_free_work_fn); + drm_mode_create_standard_properties(dev); /* Just to be sure */ @@ -431,6 +434,8 @@ void drm_mode_config_cleanup(struct drm_device *dev) drm_connector_put(connector); } drm_connector_list_iter_end(&conn_iter); + /* connector_iter drops references in a work item. */ + flush_work(&dev->mode_config.connector_free_work); if (WARN_ON(!list_empty(&dev->mode_config.connector_list))) { drm_connector_list_iter_begin(dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) @@ -467,6 +472,9 @@ void drm_mode_config_cleanup(struct drm_device *dev) */ WARN_ON(!list_empty(&dev->mode_config.fb_list)); list_for_each_entry_safe(fb, fbt, &dev->mode_config.fb_list, head) { + struct drm_printer p = drm_debug_printer("[leaked fb]"); + drm_printf(&p, "framebuffer[%u]:\n", fb->base.id); + drm_framebuffer_print_info(&p, 1, fb); drm_framebuffer_free(&fb->base.refcount); } diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 901a4e9a87a3c9b08d8750e3a807e47f52d54295..1f2af707ce03e9670fec85a779404f70fcf29585 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -9,6 +9,7 @@ */ #include +#include #include #ifdef CONFIG_DMI @@ -172,3 +173,5 @@ int drm_get_panel_orientation_quirk(int width, int height) EXPORT_SYMBOL(drm_get_panel_orientation_quirk); #endif + +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 37a93cdffb4ad0e7986a634df4d70ccc3fef286e..2c90519576a3e8b63a4c8361f18672db853ebcec 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format) } /* - * setplane_internal - setplane handler for internal callers + * __setplane_internal - setplane handler for internal callers * - * Note that we assume an extra reference has already been taken on fb. If the - * update fails, this reference will be dropped before return; if it succeeds, - * the previous framebuffer (if any) will be unreferenced instead. + * This function will take a reference on the new fb for the plane + * on success. * * src_{x,y,w,h} are provided in 16.16 fixed point format */ @@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane, if (!ret) { plane->crtc = crtc; plane->fb = fb; - fb = NULL; + drm_framebuffer_get(plane->fb); } else { plane->old_fb = NULL; } out: - if (fb) - drm_framebuffer_put(fb); if (plane->old_fb) drm_framebuffer_put(plane->old_fb); plane->old_fb = NULL; @@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, struct drm_plane *plane; struct drm_crtc *crtc = NULL; struct drm_framebuffer *fb = NULL; + int ret; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data, } } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ - return setplane_internal(plane, crtc, fb, - plane_req->crtc_x, plane_req->crtc_y, - plane_req->crtc_w, plane_req->crtc_h, - plane_req->src_x, plane_req->src_y, - plane_req->src_w, plane_req->src_h); + ret = setplane_internal(plane, crtc, fb, + plane_req->crtc_x, plane_req->crtc_y, + plane_req->crtc_w, plane_req->crtc_h, + plane_req->src_x, plane_req->src_y, + plane_req->src_w, plane_req->src_h); + + if (fb) + drm_framebuffer_put(fb); + + return ret; } static int drm_mode_cursor_universal(struct drm_crtc *crtc, @@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - /* - * setplane_internal will take care of deref'ing either the old or new - * framebuffer depending on success. - */ ret = __setplane_internal(crtc->cursor, crtc, fb, - crtc_x, crtc_y, crtc_w, crtc_h, - 0, 0, src_w, src_h, ctx); + crtc_x, crtc_y, crtc_w, crtc_h, + 0, 0, src_w, src_h, ctx); + + if (fb) + drm_framebuffer_put(fb); /* Update successful; save new cursor position, if necessary */ if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) { diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 9b733c510cbfc3030600ff33de96ee54b965b6e3..0b7b0d1ad2d595a2efacde1cdd4472b401a73278 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -29,9 +29,9 @@ /** * DOC: Overview * - * DRM synchronisation objects (syncobj) are a persistent objects, - * that contain an optional fence. The fence can be updated with a new - * fence, or be NULL. + * DRM synchronisation objects (syncobj, see struct &drm_syncobj) are + * persistent objects that contain an optional fence. The fence can be updated + * with a new fence, or be NULL. * * syncobj's can be waited upon, where it will wait for the underlying * fence. @@ -61,7 +61,8 @@ * @file_private: drm file private pointer * @handle: sync object handle to lookup. * - * Returns a reference to the syncobj pointed to by handle or NULL. + * Returns a reference to the syncobj pointed to by handle or NULL. The + * reference must be released by calling drm_syncobj_put(). */ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, u32 handle) @@ -229,6 +230,19 @@ static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj) return 0; } +/** + * drm_syncobj_find_fence - lookup and reference the fence in a sync object + * @file_private: drm file private pointer + * @handle: sync object handle to lookup. + * @fence: out parameter for the fence + * + * This is just a convenience function that combines drm_syncobj_find() and + * drm_syncobj_fence_get(). + * + * Returns 0 on success or a negative error value on failure. On success @fence + * contains a reference to the fence, which must be released by calling + * dma_fence_put(). + */ int drm_syncobj_find_fence(struct drm_file *file_private, u32 handle, struct dma_fence **fence) @@ -269,6 +283,12 @@ EXPORT_SYMBOL(drm_syncobj_free); * @out_syncobj: returned syncobj * @flags: DRM_SYNCOBJ_* flags * @fence: if non-NULL, the syncobj will represent this fence + * + * This is the first function to create a sync object. After creating, drivers + * probably want to make it available to userspace, either through + * drm_syncobj_get_handle() or drm_syncobj_get_fd(). + * + * Returns 0 on success or a negative error value on failure. */ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, struct dma_fence *fence) @@ -302,6 +322,14 @@ EXPORT_SYMBOL(drm_syncobj_create); /** * drm_syncobj_get_handle - get a handle from a syncobj + * @file_private: drm file private pointer + * @syncobj: Sync object to export + * @handle: out parameter with the new handle + * + * Exports a sync object created with drm_syncobj_create() as a handle on + * @file_private to userspace. + * + * Returns 0 on success or a negative error value on failure. */ int drm_syncobj_get_handle(struct drm_file *file_private, struct drm_syncobj *syncobj, u32 *handle) @@ -371,40 +399,35 @@ static const struct file_operations drm_syncobj_file_fops = { .release = drm_syncobj_file_release, }; -static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj) -{ - struct file *file = anon_inode_getfile("syncobj_file", - &drm_syncobj_file_fops, - syncobj, 0); - if (IS_ERR(file)) - return PTR_ERR(file); - - drm_syncobj_get(syncobj); - if (cmpxchg(&syncobj->file, NULL, file)) { - /* lost the race */ - fput(file); - } - - return 0; -} - +/** + * drm_syncobj_get_fd - get a file descriptor from a syncobj + * @syncobj: Sync object to export + * @p_fd: out parameter with the new file descriptor + * + * Exports a sync object created with drm_syncobj_create() as a file descriptor. + * + * Returns 0 on success or a negative error value on failure. + */ int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd) { - int ret; + struct file *file; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) return fd; - if (!syncobj->file) { - ret = drm_syncobj_alloc_file(syncobj); - if (ret) { - put_unused_fd(fd); - return ret; - } + file = anon_inode_getfile("syncobj_file", + &drm_syncobj_file_fops, + syncobj, 0); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); } - fd_install(fd, syncobj->file); + + drm_syncobj_get(syncobj); + fd_install(fd, file); + *p_fd = fd; return 0; } @@ -424,31 +447,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private, return ret; } -static struct drm_syncobj *drm_syncobj_fdget(int fd) -{ - struct file *file = fget(fd); - - if (!file) - return NULL; - if (file->f_op != &drm_syncobj_file_fops) - goto err; - - return file->private_data; -err: - fput(file); - return NULL; -}; - static int drm_syncobj_fd_to_handle(struct drm_file *file_private, int fd, u32 *handle) { - struct drm_syncobj *syncobj = drm_syncobj_fdget(fd); + struct drm_syncobj *syncobj; + struct file *file; int ret; - if (!syncobj) + file = fget(fd); + if (!file) return -EINVAL; + if (file->f_op != &drm_syncobj_file_fops) { + fput(file); + return -EINVAL; + } + /* take a reference to put in the idr */ + syncobj = file->private_data; drm_syncobj_get(syncobj); idr_preload(GFP_KERNEL); @@ -457,12 +473,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private, spin_unlock(&file_private->syncobj_table_lock); idr_preload_end(); - if (ret < 0) { - fput(syncobj->file); - return ret; - } - *handle = ret; - return 0; + if (ret > 0) { + *handle = ret; + ret = 0; + } else + drm_syncobj_put(syncobj); + + fput(file); + return ret; } static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private, diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig index a29b8f59eb15d984b7ebdd40d97055d70f845cc5..3f58b4077767d4fd5317fc016323cf91bf1ef827 100644 --- a/drivers/gpu/drm/etnaviv/Kconfig +++ b/drivers/gpu/drm/etnaviv/Kconfig @@ -6,6 +6,7 @@ config DRM_ETNAVIV depends on MMU select SHMEM select SYNC_FILE + select THERMAL if DRM_ETNAVIV_THERMAL select TMPFS select WANT_DEV_COREDUMP select CMA if HAVE_DMA_CONTIGUOUS @@ -13,6 +14,14 @@ config DRM_ETNAVIV help DRM driver for Vivante GPUs. +config DRM_ETNAVIV_THERMAL + bool "enable ETNAVIV thermal throttling" + depends on DRM_ETNAVIV + default y + help + Compile in support for thermal throttling. + Say Y unless you want to risk burning your SoC. + config DRM_ETNAVIV_REGISTER_LOGGING bool "enable ETNAVIV register logging" depends on DRM_ETNAVIV diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index 9e7098e3207f5d3b7914ec22dc9dc44ebd020d7a..99ad2f073c6e44bef9789cefc42ef6301155e4e1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -100,6 +100,8 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu, { u32 flush = 0; + lockdep_assert_held(&gpu->lock); + /* * This assumes that if we're switching to 2D, we're switching * away from 3D, and vice versa. Hence, if we're switching to @@ -164,7 +166,9 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu, u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; + + lockdep_assert_held(&gpu->lock); /* initialize buffer */ buffer->user_size = 0; @@ -178,7 +182,9 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu) u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; + + lockdep_assert_held(&gpu->lock); buffer->user_size = 0; @@ -211,10 +217,12 @@ u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe void etnaviv_buffer_end(struct etnaviv_gpu *gpu) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; unsigned int waitlink_offset = buffer->user_size - 16; u32 link_target, flush = 0; + lockdep_assert_held(&gpu->lock); + if (gpu->exec_state == ETNA_PIPE_2D) flush = VIVS_GL_FLUSH_CACHE_PE2D; else if (gpu->exec_state == ETNA_PIPE_3D) @@ -253,10 +261,12 @@ void etnaviv_buffer_end(struct etnaviv_gpu *gpu) /* Append a 'sync point' to the ring buffer. */ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; unsigned int waitlink_offset = buffer->user_size - 16; u32 dwords, target; + lockdep_assert_held(&gpu->lock); + /* * We need at most 3 dwords in the return target: * 1 event + 1 end + 1 wait + 1 link. @@ -287,13 +297,16 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event) } /* Append a command buffer to the ring buffer. */ -void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, - struct etnaviv_cmdbuf *cmdbuf) +void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, + unsigned int event, struct etnaviv_cmdbuf *cmdbuf) { - struct etnaviv_cmdbuf *buffer = gpu->buffer; + struct etnaviv_cmdbuf *buffer = &gpu->buffer; unsigned int waitlink_offset = buffer->user_size - 16; u32 return_target, return_dwords; u32 link_target, link_dwords; + bool switch_context = gpu->exec_state != exec_state; + + lockdep_assert_held(&gpu->lock); if (drm_debug & DRM_UT_DRIVER) etnaviv_buffer_dump(gpu, buffer, 0, 0x50); @@ -306,7 +319,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, * need to append a mmu flush load state, followed by a new * link to this buffer - a total of four additional words. */ - if (gpu->mmu->need_flush || gpu->switch_context) { + if (gpu->mmu->need_flush || switch_context) { u32 target, extra_dwords; /* link command */ @@ -321,7 +334,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, } /* pipe switch commands */ - if (gpu->switch_context) + if (switch_context) extra_dwords += 4; target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords); @@ -349,10 +362,9 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, gpu->mmu->need_flush = false; } - if (gpu->switch_context) { - etnaviv_cmd_select_pipe(gpu, buffer, cmdbuf->exec_state); - gpu->exec_state = cmdbuf->exec_state; - gpu->switch_context = false; + if (switch_context) { + etnaviv_cmd_select_pipe(gpu, buffer, exec_state); + gpu->exec_state = exec_state; } /* And the link to the submitted buffer */ @@ -421,4 +433,6 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, if (drm_debug & DRM_UT_DRIVER) etnaviv_buffer_dump(gpu, buffer, 0, 0x50); + + gpu->lastctx = cmdbuf->ctx; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c index 6e3bbcf24160eb297d7269e79828a0bfdac7bdc5..68e6d3772ad84d636166e2992a29368723946d05 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c @@ -78,6 +78,7 @@ static const struct { ST(0x17c0, 8), ST(0x17e0, 8), ST(0x2400, 14 * 16), + ST(0x3824, 1), ST(0x10800, 32 * 16), ST(0x14600, 16), ST(0x14800, 8 * 8), diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c index 66ac79558bbd4e2d8afb6f83f9d9949d7e93c2c5..3746827f45ebd76426968ac2066c06ae428fd192 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c @@ -86,26 +86,11 @@ void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc) kfree(suballoc); } -struct etnaviv_cmdbuf * -etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size, - size_t nr_bos, size_t nr_pmrs) +int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc, + struct etnaviv_cmdbuf *cmdbuf, u32 size) { - struct etnaviv_cmdbuf *cmdbuf; - struct etnaviv_perfmon_request *pmrs; - size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]), - sizeof(*cmdbuf)); int granule_offs, order, ret; - cmdbuf = kzalloc(sz, GFP_KERNEL); - if (!cmdbuf) - return NULL; - - sz = sizeof(*pmrs) * nr_pmrs; - pmrs = kzalloc(sz, GFP_KERNEL); - if (!pmrs) - goto out_free_cmdbuf; - - cmdbuf->pmrs = pmrs; cmdbuf->suballoc = suballoc; cmdbuf->size = size; @@ -123,7 +108,7 @@ etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size, if (!ret) { dev_err(suballoc->gpu->dev, "Timeout waiting for cmdbuf space\n"); - return NULL; + return -ETIMEDOUT; } goto retry; } @@ -131,11 +116,7 @@ etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size, cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE; cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset; - return cmdbuf; - -out_free_cmdbuf: - kfree(cmdbuf); - return NULL; + return 0; } void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf) @@ -151,8 +132,6 @@ void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf) suballoc->free_space = 1; mutex_unlock(&suballoc->lock); wake_up_all(&suballoc->free_event); - kfree(cmdbuf->pmrs); - kfree(cmdbuf); } u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h index b6348b9f2a9ddb9959dd51dc310e86f1be4d53e8..ddc3f7ea169cde73ec240582f031bcd2b1f3f13a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h @@ -33,27 +33,15 @@ struct etnaviv_cmdbuf { void *vaddr; u32 size; u32 user_size; - /* fence after which this buffer is to be disposed */ - struct dma_fence *fence; - /* target exec state */ - u32 exec_state; - /* per GPU in-flight list */ - struct list_head node; - /* perfmon requests */ - unsigned int nr_pmrs; - struct etnaviv_perfmon_request *pmrs; - /* BOs attached to this command buffer */ - unsigned int nr_bos; - struct etnaviv_vram_mapping *bo_map[0]; }; struct etnaviv_cmdbuf_suballoc * etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu); void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc); -struct etnaviv_cmdbuf * -etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size, - size_t nr_bos, size_t nr_pmrs); + +int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc, + struct etnaviv_cmdbuf *cmdbuf, u32 size); void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf); u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 491eddf9b15022177bc3106181aef4cccaf89fa5..6faf4042db2340e6069ee2be3c0caffba4a4d0a9 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -172,7 +172,7 @@ static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m) static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m) { - struct etnaviv_cmdbuf *buf = gpu->buffer; + struct etnaviv_cmdbuf *buf = &gpu->buffer; u32 size = buf->size; u32 *ptr = buf->vaddr; u32 i; @@ -459,9 +459,6 @@ static int etnaviv_ioctl_pm_query_dom(struct drm_device *dev, void *data, struct drm_etnaviv_pm_domain *args = data; struct etnaviv_gpu *gpu; - /* reject as long as the feature isn't stable */ - return -EINVAL; - if (args->pipe >= ETNA_MAX_PIPES) return -EINVAL; @@ -479,9 +476,6 @@ static int etnaviv_ioctl_pm_query_sig(struct drm_device *dev, void *data, struct drm_etnaviv_pm_signal *args = data; struct etnaviv_gpu *gpu; - /* reject as long as the feature isn't stable */ - return -EINVAL; - if (args->pipe >= ETNA_MAX_PIPES) return -EINVAL; @@ -556,7 +550,7 @@ static struct drm_driver etnaviv_drm_driver = { .desc = "etnaviv DRM", .date = "20151214", .major = 1, - .minor = 1, + .minor = 2, }; /* @@ -580,12 +574,6 @@ static int etnaviv_bind(struct device *dev) } drm->dev_private = priv; - priv->wq = alloc_ordered_workqueue("etnaviv", 0); - if (!priv->wq) { - ret = -ENOMEM; - goto out_wq; - } - mutex_init(&priv->gem_lock); INIT_LIST_HEAD(&priv->gem_list); priv->num_gpus = 0; @@ -607,9 +595,6 @@ static int etnaviv_bind(struct device *dev) out_register: component_unbind_all(dev, drm); out_bind: - flush_workqueue(priv->wq); - destroy_workqueue(priv->wq); -out_wq: kfree(priv); out_unref: drm_dev_unref(drm); @@ -624,9 +609,6 @@ static void etnaviv_unbind(struct device *dev) drm_dev_unregister(drm); - flush_workqueue(priv->wq); - destroy_workqueue(priv->wq); - component_unbind_all(dev, drm); drm->dev_private = NULL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h index d249acb6da0825e6e92427b0d3fe1e5f1f975a19..a54f0b758a5c9ca2ded2916e48490a509b2747a4 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h @@ -56,18 +56,8 @@ struct etnaviv_drm_private { /* list of GEM objects: */ struct mutex gem_lock; struct list_head gem_list; - - struct workqueue_struct *wq; }; -static inline void etnaviv_queue_work(struct drm_device *dev, - struct work_struct *w) -{ - struct etnaviv_drm_private *priv = dev->dev_private; - - queue_work(priv->wq, w); -} - int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file); @@ -97,8 +87,8 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu); u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr); void etnaviv_buffer_end(struct etnaviv_gpu *gpu); void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event); -void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, - struct etnaviv_cmdbuf *cmdbuf); +void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, + unsigned int event, struct etnaviv_cmdbuf *cmdbuf); void etnaviv_validate_init(void); bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu, u32 *stream, unsigned int size, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 2d955d7d7b6d83ecdfae58e5bf26084f28a98999..6d0909c589d10995873c6a567dc275caf7556901 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -120,7 +120,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) struct core_dump_iterator iter; struct etnaviv_vram_mapping *vram; struct etnaviv_gem_object *obj; - struct etnaviv_cmdbuf *cmd; + struct etnaviv_gem_submit *submit; unsigned int n_obj, n_bomap_pages; size_t file_size, mmu_size; __le64 *bomap, *bomap_start; @@ -132,11 +132,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) n_bomap_pages = 0; file_size = ARRAY_SIZE(etnaviv_dump_registers) * sizeof(struct etnaviv_dump_registers) + - mmu_size + gpu->buffer->size; + mmu_size + gpu->buffer.size; /* Add in the active command buffers */ - list_for_each_entry(cmd, &gpu->active_cmd_list, node) { - file_size += cmd->size; + list_for_each_entry(submit, &gpu->active_submit_list, node) { + file_size += submit->cmdbuf.size; n_obj++; } @@ -176,13 +176,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) etnaviv_core_dump_registers(&iter, gpu); etnaviv_core_dump_mmu(&iter, gpu, mmu_size); - etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr, - gpu->buffer->size, - etnaviv_cmdbuf_get_va(gpu->buffer)); - - list_for_each_entry(cmd, &gpu->active_cmd_list, node) - etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr, - cmd->size, etnaviv_cmdbuf_get_va(cmd)); + etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr, + gpu->buffer.size, + etnaviv_cmdbuf_get_va(&gpu->buffer)); + + list_for_each_entry(submit, &gpu->active_submit_list, node) + etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, + submit->cmdbuf.vaddr, submit->cmdbuf.size, + etnaviv_cmdbuf_get_va(&submit->cmdbuf)); /* Reserve space for the bomap */ if (n_bomap_pages) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index daee3f1196df831b131ad1c229c25fba2d8364b5..fcc969fa0e6927bd189ca66fb8f3964c40cd5609 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -24,6 +24,9 @@ #include "etnaviv_gpu.h" #include "etnaviv_mmu.h" +static struct lock_class_key etnaviv_shm_lock_class; +static struct lock_class_key etnaviv_userptr_lock_class; + static void etnaviv_gem_scatter_map(struct etnaviv_gem_object *etnaviv_obj) { struct drm_device *dev = etnaviv_obj->base.dev; @@ -583,7 +586,7 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj) kfree(etnaviv_obj); } -int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) +void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) { struct etnaviv_drm_private *priv = dev->dev_private; struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); @@ -591,8 +594,6 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj) mutex_lock(&priv->gem_lock); list_add_tail(&etnaviv_obj->gem_node, &priv->gem_list); mutex_unlock(&priv->gem_lock); - - return 0; } static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags, @@ -640,8 +641,9 @@ static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags, return 0; } -static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, - u32 size, u32 flags) +/* convenience method to construct a GEM buffer object, and userspace handle */ +int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file, + u32 size, u32 flags, u32 *handle) { struct drm_gem_object *obj = NULL; int ret; @@ -653,6 +655,8 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, if (ret) goto fail; + lockdep_set_class(&to_etnaviv_bo(obj)->lock, &etnaviv_shm_lock_class); + ret = drm_gem_object_init(dev, obj, size); if (ret == 0) { struct address_space *mapping; @@ -660,7 +664,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, /* * Our buffers are kept pinned, so allocating them * from the MOVABLE zone is a really bad idea, and - * conflicts with CMA. See coments above new_inode() + * conflicts with CMA. See comments above new_inode() * why this is required _and_ expected if you're * going to pin these pages. */ @@ -672,33 +676,12 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev, if (ret) goto fail; - return obj; - -fail: - drm_gem_object_put_unlocked(obj); - return ERR_PTR(ret); -} - -/* convenience method to construct a GEM buffer object, and userspace handle */ -int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file, - u32 size, u32 flags, u32 *handle) -{ - struct drm_gem_object *obj; - int ret; - - obj = __etnaviv_gem_new(dev, size, flags); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - ret = etnaviv_gem_obj_add(dev, obj); - if (ret < 0) { - drm_gem_object_put_unlocked(obj); - return ret; - } + etnaviv_gem_obj_add(dev, obj); ret = drm_gem_handle_create(file, obj, handle); /* drop reference from allocate - handle holds it now */ +fail: drm_gem_object_put_unlocked(obj); return ret; @@ -722,139 +705,41 @@ int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags, return 0; } -struct get_pages_work { - struct work_struct work; - struct mm_struct *mm; - struct task_struct *task; - struct etnaviv_gem_object *etnaviv_obj; -}; - -static struct page **etnaviv_gem_userptr_do_get_pages( - struct etnaviv_gem_object *etnaviv_obj, struct mm_struct *mm, struct task_struct *task) -{ - int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT; - struct page **pvec; - uintptr_t ptr; - unsigned int flags = 0; - - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!pvec) - return ERR_PTR(-ENOMEM); - - if (!etnaviv_obj->userptr.ro) - flags |= FOLL_WRITE; - - pinned = 0; - ptr = etnaviv_obj->userptr.ptr; - - down_read(&mm->mmap_sem); - while (pinned < npages) { - ret = get_user_pages_remote(task, mm, ptr, npages - pinned, - flags, pvec + pinned, NULL, NULL); - if (ret < 0) - break; - - ptr += ret * PAGE_SIZE; - pinned += ret; - } - up_read(&mm->mmap_sem); - - if (ret < 0) { - release_pages(pvec, pinned); - kvfree(pvec); - return ERR_PTR(ret); - } - - return pvec; -} - -static void __etnaviv_gem_userptr_get_pages(struct work_struct *_work) -{ - struct get_pages_work *work = container_of(_work, typeof(*work), work); - struct etnaviv_gem_object *etnaviv_obj = work->etnaviv_obj; - struct page **pvec; - - pvec = etnaviv_gem_userptr_do_get_pages(etnaviv_obj, work->mm, work->task); - - mutex_lock(&etnaviv_obj->lock); - if (IS_ERR(pvec)) { - etnaviv_obj->userptr.work = ERR_CAST(pvec); - } else { - etnaviv_obj->userptr.work = NULL; - etnaviv_obj->pages = pvec; - } - - mutex_unlock(&etnaviv_obj->lock); - drm_gem_object_put_unlocked(&etnaviv_obj->base); - - mmput(work->mm); - put_task_struct(work->task); - kfree(work); -} - static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) { struct page **pvec = NULL; - struct get_pages_work *work; - struct mm_struct *mm; - int ret, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT; - - if (etnaviv_obj->userptr.work) { - if (IS_ERR(etnaviv_obj->userptr.work)) { - ret = PTR_ERR(etnaviv_obj->userptr.work); - etnaviv_obj->userptr.work = NULL; - } else { - ret = -EAGAIN; - } - return ret; - } + struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr; + int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT; - mm = get_task_mm(etnaviv_obj->userptr.task); - pinned = 0; - if (mm == current->mm) { - pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); - if (!pvec) { - mmput(mm); - return -ENOMEM; - } - - pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages, - !etnaviv_obj->userptr.ro, pvec); - if (pinned < 0) { - kvfree(pvec); - mmput(mm); - return pinned; - } - - if (pinned == npages) { - etnaviv_obj->pages = pvec; - mmput(mm); - return 0; - } - } + might_lock_read(¤t->mm->mmap_sem); - release_pages(pvec, pinned); - kvfree(pvec); + if (userptr->mm != current->mm) + return -EPERM; - work = kmalloc(sizeof(*work), GFP_KERNEL); - if (!work) { - mmput(mm); + pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + if (!pvec) return -ENOMEM; - } - get_task_struct(current); - drm_gem_object_get(&etnaviv_obj->base); + do { + unsigned num_pages = npages - pinned; + uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE; + struct page **pages = pvec + pinned; - work->mm = mm; - work->task = current; - work->etnaviv_obj = etnaviv_obj; + ret = get_user_pages_fast(ptr, num_pages, + !userptr->ro ? FOLL_WRITE : 0, pages); + if (ret < 0) { + release_pages(pvec, pinned); + kvfree(pvec); + return ret; + } + + pinned += ret; - etnaviv_obj->userptr.work = &work->work; - INIT_WORK(&work->work, __etnaviv_gem_userptr_get_pages); + } while (pinned < npages); - etnaviv_queue_work(etnaviv_obj->base.dev, &work->work); + etnaviv_obj->pages = pvec; - return -EAGAIN; + return 0; } static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) @@ -870,7 +755,6 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj) release_pages(etnaviv_obj->pages, npages); kvfree(etnaviv_obj->pages); } - put_task_struct(etnaviv_obj->userptr.task); } static int etnaviv_gem_userptr_mmap_obj(struct etnaviv_gem_object *etnaviv_obj, @@ -897,17 +781,16 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file, if (ret) return ret; + lockdep_set_class(&etnaviv_obj->lock, &etnaviv_userptr_lock_class); + etnaviv_obj->userptr.ptr = ptr; - etnaviv_obj->userptr.task = current; + etnaviv_obj->userptr.mm = current->mm; etnaviv_obj->userptr.ro = !(flags & ETNA_USERPTR_WRITE); - get_task_struct(current); - ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base); - if (ret) - goto unreference; + etnaviv_gem_obj_add(dev, &etnaviv_obj->base); ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle); -unreference: + /* drop reference from allocate - handle holds it now */ drm_gem_object_put_unlocked(&etnaviv_obj->base); return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h index e437fba1209d925cca7bf7f33b5651c3eeeda21a..be72a9833f2b4723c28da411896bb9eaa4ffe334 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h @@ -18,6 +18,7 @@ #define __ETNAVIV_GEM_H__ #include +#include "etnaviv_cmdbuf.h" #include "etnaviv_drv.h" struct dma_fence; @@ -26,8 +27,7 @@ struct etnaviv_gem_object; struct etnaviv_gem_userptr { uintptr_t ptr; - struct task_struct *task; - struct work_struct *work; + struct mm_struct *mm; bool ro; }; @@ -98,26 +98,32 @@ struct etnaviv_gem_submit_bo { /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, * associated with the cmdstream submission for synchronization (and - * make it easier to unwind when things go wrong, etc). This only - * lasts for the duration of the submit-ioctl. + * make it easier to unwind when things go wrong, etc). */ struct etnaviv_gem_submit { - struct drm_device *dev; + struct kref refcount; struct etnaviv_gpu *gpu; - struct ww_acquire_ctx ticket; - struct dma_fence *fence; + struct dma_fence *out_fence, *in_fence; + struct list_head node; /* GPU active submit list */ + struct etnaviv_cmdbuf cmdbuf; + bool runtime_resumed; + u32 exec_state; u32 flags; + unsigned int nr_pmrs; + struct etnaviv_perfmon_request *pmrs; unsigned int nr_bos; struct etnaviv_gem_submit_bo bos[0]; /* No new members here, the previous one is variable-length! */ }; +void etnaviv_submit_put(struct etnaviv_gem_submit * submit); + int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, struct timespec *timeout); int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags, struct reservation_object *robj, const struct etnaviv_gem_ops *ops, struct etnaviv_gem_object **res); -int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj); +void etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj); struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj); void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index ae884723e9b1b7a59c3f38a4518d6a804be6b074..5704305d41e69f57466ded0628f37be669304fbd 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -19,6 +19,7 @@ #include "etnaviv_drv.h" #include "etnaviv_gem.h" +static struct lock_class_key etnaviv_prime_lock_class; struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj) { @@ -125,6 +126,8 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, if (ret < 0) return ERR_PTR(ret); + lockdep_set_class(&etnaviv_obj->lock, &etnaviv_prime_lock_class); + npages = size / PAGE_SIZE; etnaviv_obj->sgt = sgt; @@ -139,9 +142,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev, if (ret) goto fail; - ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base); - if (ret) - goto fail; + etnaviv_gem_obj_add(dev, &etnaviv_obj->base); return &etnaviv_obj->base; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index ff911541a190a4819e0809ea60159c0e08d3f6e0..1f8202bca061542a2f0d994d6f2554540809ac90 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -33,22 +33,25 @@ #define BO_PINNED 0x2000 static struct etnaviv_gem_submit *submit_create(struct drm_device *dev, - struct etnaviv_gpu *gpu, size_t nr) + struct etnaviv_gpu *gpu, size_t nr_bos, size_t nr_pmrs) { struct etnaviv_gem_submit *submit; - size_t sz = size_vstruct(nr, sizeof(submit->bos[0]), sizeof(*submit)); + size_t sz = size_vstruct(nr_bos, sizeof(submit->bos[0]), sizeof(*submit)); - submit = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (submit) { - submit->dev = dev; - submit->gpu = gpu; + submit = kzalloc(sz, GFP_KERNEL); + if (!submit) + return NULL; - /* initially, until copy_from_user() and bo lookup succeeds: */ - submit->nr_bos = 0; - submit->fence = NULL; - - ww_acquire_init(&submit->ticket, &reservation_ww_class); + submit->pmrs = kcalloc(nr_pmrs, sizeof(struct etnaviv_perfmon_request), + GFP_KERNEL); + if (!submit->pmrs) { + kfree(submit); + return NULL; } + submit->nr_pmrs = nr_pmrs; + + submit->gpu = gpu; + kref_init(&submit->refcount); return submit; } @@ -111,7 +114,8 @@ static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i) } } -static int submit_lock_objects(struct etnaviv_gem_submit *submit) +static int submit_lock_objects(struct etnaviv_gem_submit *submit, + struct ww_acquire_ctx *ticket) { int contended, slow_locked = -1, i, ret = 0; @@ -126,7 +130,7 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit) if (!(submit->bos[i].flags & BO_LOCKED)) { ret = ww_mutex_lock_interruptible(&etnaviv_obj->resv->lock, - &submit->ticket); + ticket); if (ret == -EALREADY) DRM_ERROR("BO at index %u already on submit list\n", i); @@ -136,7 +140,7 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit) } } - ww_acquire_done(&submit->ticket); + ww_acquire_done(ticket); return 0; @@ -154,7 +158,7 @@ static int submit_lock_objects(struct etnaviv_gem_submit *submit) /* we lost out in a seqno race, lock and retry.. */ ret = ww_mutex_lock_slow_interruptible(&etnaviv_obj->resv->lock, - &submit->ticket); + ticket); if (!ret) { submit->bos[contended].flags |= BO_LOCKED; slow_locked = contended; @@ -181,19 +185,33 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit) break; } + if (submit->flags & ETNA_SUBMIT_FENCE_FD_IN) { + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. + */ + if (!dma_fence_match_context(submit->in_fence, context)) + ret = dma_fence_wait(submit->in_fence, true); + } + return ret; } -static void submit_unpin_objects(struct etnaviv_gem_submit *submit) +static void submit_attach_object_fences(struct etnaviv_gem_submit *submit) { int i; for (i = 0; i < submit->nr_bos; i++) { - if (submit->bos[i].flags & BO_PINNED) - etnaviv_gem_mapping_unreference(submit->bos[i].mapping); + struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; + + if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) + reservation_object_add_excl_fence(etnaviv_obj->resv, + submit->out_fence); + else + reservation_object_add_shared_fence(etnaviv_obj->resv, + submit->out_fence); - submit->bos[i].mapping = NULL; - submit->bos[i].flags &= ~BO_PINNED; + submit_unlock_object(submit, i); } } @@ -211,6 +229,7 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit) ret = PTR_ERR(mapping); break; } + atomic_inc(&etnaviv_obj->gpu_active); submit->bos[i].flags |= BO_PINNED; submit->bos[i].mapping = mapping; @@ -285,13 +304,11 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream, } static int submit_perfmon_validate(struct etnaviv_gem_submit *submit, - struct etnaviv_cmdbuf *cmdbuf, - const struct drm_etnaviv_gem_submit_pmr *pmrs, - u32 nr_pms) + u32 exec_state, const struct drm_etnaviv_gem_submit_pmr *pmrs) { u32 i; - for (i = 0; i < nr_pms; i++) { + for (i = 0; i < submit->nr_pmrs; i++) { const struct drm_etnaviv_gem_submit_pmr *r = pmrs + i; struct etnaviv_gem_submit_bo *bo; int ret; @@ -316,39 +333,65 @@ static int submit_perfmon_validate(struct etnaviv_gem_submit *submit, return -EINVAL; } - if (etnaviv_pm_req_validate(r, cmdbuf->exec_state)) { + if (etnaviv_pm_req_validate(r, exec_state)) { DRM_ERROR("perfmon request: domain or signal not valid"); return -EINVAL; } - cmdbuf->pmrs[i].flags = r->flags; - cmdbuf->pmrs[i].domain = r->domain; - cmdbuf->pmrs[i].signal = r->signal; - cmdbuf->pmrs[i].sequence = r->sequence; - cmdbuf->pmrs[i].offset = r->read_offset; - cmdbuf->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base); + submit->pmrs[i].flags = r->flags; + submit->pmrs[i].domain = r->domain; + submit->pmrs[i].signal = r->signal; + submit->pmrs[i].sequence = r->sequence; + submit->pmrs[i].offset = r->read_offset; + submit->pmrs[i].bo_vma = etnaviv_gem_vmap(&bo->obj->base); } return 0; } -static void submit_cleanup(struct etnaviv_gem_submit *submit) +static void submit_cleanup(struct kref *kref) { + struct etnaviv_gem_submit *submit = + container_of(kref, struct etnaviv_gem_submit, refcount); unsigned i; + if (submit->runtime_resumed) + pm_runtime_put_autosuspend(submit->gpu->dev); + + if (submit->cmdbuf.suballoc) + etnaviv_cmdbuf_free(&submit->cmdbuf); + for (i = 0; i < submit->nr_bos; i++) { struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; + /* unpin all objects */ + if (submit->bos[i].flags & BO_PINNED) { + etnaviv_gem_mapping_unreference(submit->bos[i].mapping); + atomic_dec(&etnaviv_obj->gpu_active); + submit->bos[i].mapping = NULL; + submit->bos[i].flags &= ~BO_PINNED; + } + + /* if the GPU submit failed, objects might still be locked */ submit_unlock_object(submit, i); drm_gem_object_put_unlocked(&etnaviv_obj->base); } - ww_acquire_fini(&submit->ticket); - if (submit->fence) - dma_fence_put(submit->fence); + wake_up_all(&submit->gpu->fence_event); + + if (submit->in_fence) + dma_fence_put(submit->in_fence); + if (submit->out_fence) + dma_fence_put(submit->out_fence); + kfree(submit->pmrs); kfree(submit); } +void etnaviv_submit_put(struct etnaviv_gem_submit *submit) +{ + kref_put(&submit->refcount, submit_cleanup); +} + int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file) { @@ -358,10 +401,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_etnaviv_gem_submit_pmr *pmrs; struct drm_etnaviv_gem_submit_bo *bos; struct etnaviv_gem_submit *submit; - struct etnaviv_cmdbuf *cmdbuf; struct etnaviv_gpu *gpu; - struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; + struct ww_acquire_ctx ticket; int out_fence_fd = -1; void *stream; int ret; @@ -399,17 +441,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, relocs = kvmalloc_array(args->nr_relocs, sizeof(*relocs), GFP_KERNEL); pmrs = kvmalloc_array(args->nr_pmrs, sizeof(*pmrs), GFP_KERNEL); stream = kvmalloc_array(1, args->stream_size, GFP_KERNEL); - cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, - ALIGN(args->stream_size, 8) + 8, - args->nr_bos, args->nr_pmrs); - if (!bos || !relocs || !pmrs || !stream || !cmdbuf) { + if (!bos || !relocs || !pmrs || !stream) { ret = -ENOMEM; goto err_submit_cmds; } - cmdbuf->exec_state = args->exec_state; - cmdbuf->ctx = file->driver_priv; - ret = copy_from_user(bos, u64_to_user_ptr(args->bos), args->nr_bos * sizeof(*bos)); if (ret) { @@ -430,7 +466,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ret = -EFAULT; goto err_submit_cmds; } - cmdbuf->nr_pmrs = args->nr_pmrs; ret = copy_from_user(stream, u64_to_user_ptr(args->stream), args->stream_size); @@ -447,19 +482,28 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, } } - submit = submit_create(dev, gpu, args->nr_bos); + ww_acquire_init(&ticket, &reservation_ww_class); + + submit = submit_create(dev, gpu, args->nr_bos, args->nr_pmrs); if (!submit) { ret = -ENOMEM; - goto err_submit_cmds; + goto err_submit_ww_acquire; } + ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &submit->cmdbuf, + ALIGN(args->stream_size, 8) + 8); + if (ret) + goto err_submit_objects; + + submit->cmdbuf.ctx = file->driver_priv; + submit->exec_state = args->exec_state; submit->flags = args->flags; ret = submit_lookup_objects(submit, file, bos, args->nr_bos); if (ret) goto err_submit_objects; - ret = submit_lock_objects(submit); + ret = submit_lock_objects(submit, &ticket); if (ret) goto err_submit_objects; @@ -470,21 +514,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, } if (args->flags & ETNA_SUBMIT_FENCE_FD_IN) { - in_fence = sync_file_get_fence(args->fence_fd); - if (!in_fence) { + submit->in_fence = sync_file_get_fence(args->fence_fd); + if (!submit->in_fence) { ret = -EINVAL; goto err_submit_objects; } - - /* - * Wait if the fence is from a foreign context, or if the fence - * array contains any fence from a foreign context. - */ - if (!dma_fence_match_context(in_fence, gpu->fence_context)) { - ret = dma_fence_wait(in_fence, true); - if (ret) - goto err_submit_objects; - } } ret = submit_fence_sync(submit); @@ -493,25 +527,25 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, ret = submit_pin_objects(submit); if (ret) - goto out; + goto err_submit_objects; ret = submit_reloc(submit, stream, args->stream_size / 4, relocs, args->nr_relocs); if (ret) - goto out; + goto err_submit_objects; - ret = submit_perfmon_validate(submit, cmdbuf, pmrs, args->nr_pmrs); + ret = submit_perfmon_validate(submit, args->exec_state, pmrs); if (ret) - goto out; + goto err_submit_objects; - memcpy(cmdbuf->vaddr, stream, args->stream_size); - cmdbuf->user_size = ALIGN(args->stream_size, 8); + memcpy(submit->cmdbuf.vaddr, stream, args->stream_size); + submit->cmdbuf.user_size = ALIGN(args->stream_size, 8); - ret = etnaviv_gpu_submit(gpu, submit, cmdbuf); + ret = etnaviv_gpu_submit(gpu, submit); if (ret) - goto out; + goto err_submit_objects; - cmdbuf = NULL; + submit_attach_object_fences(submit); if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) { /* @@ -520,39 +554,26 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, * fence to the sync file here, eliminating the ENOMEM * possibility at this stage. */ - sync_file = sync_file_create(submit->fence); + sync_file = sync_file_create(submit->out_fence); if (!sync_file) { ret = -ENOMEM; - goto out; + goto err_submit_objects; } fd_install(out_fence_fd, sync_file->file); } args->fence_fd = out_fence_fd; - args->fence = submit->fence->seqno; - -out: - submit_unpin_objects(submit); - - /* - * If we're returning -EAGAIN, it may be due to the userptr code - * wanting to run its workqueue outside of any locks. Flush our - * workqueue to ensure that it is run in a timely manner. - */ - if (ret == -EAGAIN) - flush_workqueue(priv->wq); + args->fence = submit->out_fence->seqno; err_submit_objects: - if (in_fence) - dma_fence_put(in_fence); - submit_cleanup(submit); + etnaviv_submit_put(submit); + +err_submit_ww_acquire: + ww_acquire_fini(&ticket); err_submit_cmds: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); - /* if we still own the cmdbuf */ - if (cmdbuf) - etnaviv_cmdbuf_free(cmdbuf); if (stream) kvfree(stream); if (bos) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index e19cbe05da2a31ca9456d2476e388167aedee219..21d0d22f11688fbaf7606faa8e7011e6ba82a932 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -644,7 +644,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu) prefetch = etnaviv_buffer_init(gpu); gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U); - etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(gpu->buffer), + etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(&gpu->buffer), prefetch); } @@ -717,15 +717,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) } /* Create buffer: */ - gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0, 0); - if (!gpu->buffer) { - ret = -ENOMEM; + ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &gpu->buffer, + PAGE_SIZE); + if (ret) { dev_err(gpu->dev, "could not create command buffer\n"); goto destroy_iommu; } if (gpu->mmu->version == ETNAVIV_IOMMU_V1 && - etnaviv_cmdbuf_get_va(gpu->buffer) > 0x80000000) { + etnaviv_cmdbuf_get_va(&gpu->buffer) > 0x80000000) { ret = -EINVAL; dev_err(gpu->dev, "command buffer outside valid memory window\n"); @@ -751,8 +751,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu) return 0; free_buffer: - etnaviv_cmdbuf_free(gpu->buffer); - gpu->buffer = NULL; + etnaviv_cmdbuf_free(&gpu->buffer); destroy_iommu: etnaviv_iommu_destroy(gpu->mmu); gpu->mmu = NULL; @@ -958,7 +957,7 @@ static void recover_worker(struct work_struct *work) pm_runtime_put_autosuspend(gpu->dev); /* Retire the buffer objects in a work */ - etnaviv_queue_work(gpu->drm, &gpu->retire_work); + queue_work(gpu->wq, &gpu->retire_work); } static void hangcheck_timer_reset(struct etnaviv_gpu *gpu) @@ -994,7 +993,7 @@ static void hangcheck_handler(struct timer_list *t) dev_err(gpu->dev, " completed fence: %u\n", fence); dev_err(gpu->dev, " active fence: %u\n", gpu->active_fence); - etnaviv_queue_work(gpu->drm, &gpu->recover_work); + queue_work(gpu->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ @@ -1201,42 +1200,23 @@ static void retire_worker(struct work_struct *work) struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu, retire_work); u32 fence = gpu->completed_fence; - struct etnaviv_cmdbuf *cmdbuf, *tmp; - unsigned int i; + struct etnaviv_gem_submit *submit, *tmp; + LIST_HEAD(retire_list); mutex_lock(&gpu->lock); - list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) { - if (!dma_fence_is_signaled(cmdbuf->fence)) + list_for_each_entry_safe(submit, tmp, &gpu->active_submit_list, node) { + if (!dma_fence_is_signaled(submit->out_fence)) break; - list_del(&cmdbuf->node); - dma_fence_put(cmdbuf->fence); - - for (i = 0; i < cmdbuf->nr_bos; i++) { - struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i]; - struct etnaviv_gem_object *etnaviv_obj = mapping->object; - - atomic_dec(&etnaviv_obj->gpu_active); - /* drop the refcount taken in etnaviv_gpu_submit */ - etnaviv_gem_mapping_unreference(mapping); - } - - etnaviv_cmdbuf_free(cmdbuf); - /* - * We need to balance the runtime PM count caused by - * each submission. Upon submission, we increment - * the runtime PM counter, and allocate one event. - * So here, we put the runtime PM count for each - * completed event. - */ - pm_runtime_put_autosuspend(gpu->dev); + list_move(&submit->node, &retire_list); } gpu->retired_fence = fence; mutex_unlock(&gpu->lock); - wake_up_all(&gpu->fence_event); + list_for_each_entry_safe(submit, tmp, &retire_list, node) + etnaviv_submit_put(submit); } int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu, @@ -1295,41 +1275,25 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu, ret = wait_event_interruptible_timeout(gpu->fence_event, !is_active(etnaviv_obj), remaining); - if (ret > 0) { - struct etnaviv_drm_private *priv = gpu->drm->dev_private; - - /* Synchronise with the retire worker */ - flush_workqueue(priv->wq); + if (ret > 0) return 0; - } else if (ret == -ERESTARTSYS) { + else if (ret == -ERESTARTSYS) return -ERESTARTSYS; - } else { + else return -ETIMEDOUT; - } -} - -int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu) -{ - return pm_runtime_get_sync(gpu->dev); -} - -void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) -{ - pm_runtime_mark_last_busy(gpu->dev); - pm_runtime_put_autosuspend(gpu->dev); } static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, struct etnaviv_event *event, unsigned int flags) { - const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + const struct etnaviv_gem_submit *submit = event->submit; unsigned int i; - for (i = 0; i < cmdbuf->nr_pmrs; i++) { - const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + for (i = 0; i < submit->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = submit->pmrs + i; if (pmr->flags == flags) - etnaviv_perfmon_process(gpu, pmr); + etnaviv_perfmon_process(gpu, pmr, submit->exec_state); } } @@ -1354,14 +1318,14 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, struct etnaviv_event *event) { - const struct etnaviv_cmdbuf *cmdbuf = event->cmdbuf; + const struct etnaviv_gem_submit *submit = event->submit; unsigned int i; u32 val; sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); - for (i = 0; i < cmdbuf->nr_pmrs; i++) { - const struct etnaviv_perfmon_request *pmr = cmdbuf->pmrs + i; + for (i = 0; i < submit->nr_pmrs; i++) { + const struct etnaviv_perfmon_request *pmr = submit->pmrs + i; *pmr->bo_vma = pmr->sequence; } @@ -1380,24 +1344,15 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, /* add bo's to gpu's ring, and kick gpu: */ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, - struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) + struct etnaviv_gem_submit *submit) { - struct dma_fence *fence; unsigned int i, nr_events = 1, event[3]; int ret; - ret = etnaviv_gpu_pm_get_sync(gpu); + ret = pm_runtime_get_sync(gpu->dev); if (ret < 0) return ret; - - /* - * TODO - * - * - flush - * - data endian - * - prefetch - * - */ + submit->runtime_resumed = true; /* * if there are performance monitor requests we need to have @@ -1406,19 +1361,19 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, * - a sync point to re-configure gpu, process ETNA_PM_PROCESS_POST requests * and update the sequence number for userspace. */ - if (cmdbuf->nr_pmrs) + if (submit->nr_pmrs) nr_events = 3; ret = event_alloc(gpu, nr_events, event); if (ret) { DRM_ERROR("no free events\n"); - goto out_pm_put; + return ret; } mutex_lock(&gpu->lock); - fence = etnaviv_gpu_fence_alloc(gpu); - if (!fence) { + submit->out_fence = etnaviv_gpu_fence_alloc(gpu); + if (!submit->out_fence) { for (i = 0; i < nr_events; i++) event_free(gpu, event[i]); @@ -1426,80 +1381,51 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_unlock; } - gpu->event[event[0]].fence = fence; - submit->fence = dma_fence_get(fence); - gpu->active_fence = submit->fence->seqno; + gpu->active_fence = submit->out_fence->seqno; - if (gpu->lastctx != cmdbuf->ctx) { - gpu->mmu->need_flush = true; - gpu->switch_context = true; - gpu->lastctx = cmdbuf->ctx; - } - - if (cmdbuf->nr_pmrs) { + if (submit->nr_pmrs) { gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre; - gpu->event[event[1]].cmdbuf = cmdbuf; + kref_get(&submit->refcount); + gpu->event[event[1]].submit = submit; etnaviv_sync_point_queue(gpu, event[1]); } - etnaviv_buffer_queue(gpu, event[0], cmdbuf); + kref_get(&submit->refcount); + gpu->event[event[0]].fence = submit->out_fence; + etnaviv_buffer_queue(gpu, submit->exec_state, event[0], + &submit->cmdbuf); - if (cmdbuf->nr_pmrs) { + if (submit->nr_pmrs) { gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post; - gpu->event[event[2]].cmdbuf = cmdbuf; + kref_get(&submit->refcount); + gpu->event[event[2]].submit = submit; etnaviv_sync_point_queue(gpu, event[2]); } - cmdbuf->fence = fence; - list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); - - /* We're committed to adding this command buffer, hold a PM reference */ - pm_runtime_get_noresume(gpu->dev); - - for (i = 0; i < submit->nr_bos; i++) { - struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj; + list_add_tail(&submit->node, &gpu->active_submit_list); - /* Each cmdbuf takes a refcount on the mapping */ - etnaviv_gem_mapping_reference(submit->bos[i].mapping); - cmdbuf->bo_map[i] = submit->bos[i].mapping; - atomic_inc(&etnaviv_obj->gpu_active); - - if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) - reservation_object_add_excl_fence(etnaviv_obj->resv, - fence); - else - reservation_object_add_shared_fence(etnaviv_obj->resv, - fence); - } - cmdbuf->nr_bos = submit->nr_bos; hangcheck_timer_reset(gpu); ret = 0; out_unlock: mutex_unlock(&gpu->lock); -out_pm_put: - etnaviv_gpu_pm_put(gpu); - return ret; } -static void etnaviv_process_sync_point(struct etnaviv_gpu *gpu, - struct etnaviv_event *event) -{ - u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); - - event->sync_point(gpu, event); - etnaviv_gpu_start_fe(gpu, addr + 2, 2); -} - static void sync_point_worker(struct work_struct *work) { struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu, sync_point_work); + struct etnaviv_event *event = &gpu->event[gpu->sync_point_event]; + u32 addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); - etnaviv_process_sync_point(gpu, &gpu->event[gpu->sync_point_event]); + event->sync_point(gpu, event); + etnaviv_submit_put(event->submit); event_free(gpu, gpu->sync_point_event); + + /* restart FE last to avoid GPU and IRQ racing against this worker */ + etnaviv_gpu_start_fe(gpu, addr + 2, 2); } /* @@ -1550,7 +1476,7 @@ static irqreturn_t irq_handler(int irq, void *data) if (gpu->event[event].sync_point) { gpu->sync_point_event = event; - etnaviv_queue_work(gpu->drm, &gpu->sync_point_work); + queue_work(gpu->wq, &gpu->sync_point_work); } fence = gpu->event[event].fence; @@ -1576,7 +1502,7 @@ static irqreturn_t irq_handler(int irq, void *data) } /* Retire the buffer objects in a work */ - etnaviv_queue_work(gpu->drm, &gpu->retire_work); + queue_work(gpu->wq, &gpu->retire_work); ret = IRQ_HANDLED; } @@ -1653,9 +1579,11 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms) static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu) { - if (gpu->buffer) { + if (gpu->buffer.suballoc) { /* Replace the last WAIT with END */ + mutex_lock(&gpu->lock); etnaviv_buffer_end(gpu); + mutex_unlock(&gpu->lock); /* * We know that only the FE is busy here, this should @@ -1680,7 +1608,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu) etnaviv_gpu_update_clock(gpu); etnaviv_gpu_hw_init(gpu); - gpu->switch_context = true; + gpu->lastctx = NULL; gpu->exec_state = -1; mutex_unlock(&gpu->lock); @@ -1738,20 +1666,29 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, struct etnaviv_gpu *gpu = dev_get_drvdata(dev); int ret; - if (IS_ENABLED(CONFIG_THERMAL)) { + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) { gpu->cooling = thermal_of_cooling_device_register(dev->of_node, (char *)dev_name(dev), gpu, &cooling_ops); if (IS_ERR(gpu->cooling)) return PTR_ERR(gpu->cooling); } + gpu->wq = alloc_ordered_workqueue(dev_name(dev), 0); + if (!gpu->wq) { + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) + thermal_cooling_device_unregister(gpu->cooling); + return -ENOMEM; + } + #ifdef CONFIG_PM ret = pm_runtime_get_sync(gpu->dev); #else ret = etnaviv_gpu_clk_enable(gpu); #endif if (ret < 0) { - thermal_cooling_device_unregister(gpu->cooling); + destroy_workqueue(gpu->wq); + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) + thermal_cooling_device_unregister(gpu->cooling); return ret; } @@ -1759,7 +1696,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, gpu->fence_context = dma_fence_context_alloc(1); spin_lock_init(&gpu->fence_spinlock); - INIT_LIST_HEAD(&gpu->active_cmd_list); + INIT_LIST_HEAD(&gpu->active_submit_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->sync_point_work, sync_point_worker); INIT_WORK(&gpu->recover_work, recover_worker); @@ -1784,6 +1721,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, hangcheck_disable(gpu); + flush_workqueue(gpu->wq); + destroy_workqueue(gpu->wq); + #ifdef CONFIG_PM pm_runtime_get_sync(gpu->dev); pm_runtime_put_sync_suspend(gpu->dev); @@ -1791,10 +1731,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, etnaviv_gpu_hw_suspend(gpu); #endif - if (gpu->buffer) { - etnaviv_cmdbuf_free(gpu->buffer); - gpu->buffer = NULL; - } + if (gpu->buffer.suballoc) + etnaviv_cmdbuf_free(&gpu->buffer); if (gpu->cmdbuf_suballoc) { etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc); @@ -1808,7 +1746,8 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master, gpu->drm = NULL; - thermal_cooling_device_unregister(gpu->cooling); + if (IS_ENABLED(CONFIG_DRM_ETNAVIV_THERMAL)) + thermal_cooling_device_unregister(gpu->cooling); gpu->cooling = NULL; } @@ -1931,7 +1870,7 @@ static int etnaviv_gpu_rpm_resume(struct device *dev) return ret; /* Re-initialise the basic hardware state */ - if (gpu->drm && gpu->buffer) { + if (gpu->drm && gpu->buffer.suballoc) { ret = etnaviv_gpu_hw_resume(gpu); if (ret) { etnaviv_gpu_clk_disable(gpu); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 4f10f147297ab25cb233e0032e3042b862f98442..7623905210dc3ab8e182d939874f612181c0018a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -20,6 +20,7 @@ #include #include +#include "etnaviv_cmdbuf.h" #include "etnaviv_drv.h" struct etnaviv_gem_submit; @@ -89,7 +90,7 @@ struct etnaviv_chip_identity { struct etnaviv_event { struct dma_fence *fence; - struct etnaviv_cmdbuf *cmdbuf; + struct etnaviv_gem_submit *submit; void (*sync_point)(struct etnaviv_gpu *gpu, struct etnaviv_event *event); }; @@ -106,10 +107,10 @@ struct etnaviv_gpu { struct mutex lock; struct etnaviv_chip_identity identity; struct etnaviv_file_private *lastctx; - bool switch_context; + struct workqueue_struct *wq; /* 'ring'-buffer: */ - struct etnaviv_cmdbuf *buffer; + struct etnaviv_cmdbuf buffer; int exec_state; /* bus base address of memory */ @@ -122,7 +123,7 @@ struct etnaviv_gpu { spinlock_t event_spinlock; /* list of currently in-flight command buffers */ - struct list_head active_cmd_list; + struct list_head active_submit_list; u32 idle_mask; @@ -202,7 +203,7 @@ int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu, int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu, struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout); int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, - struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf); + struct etnaviv_gem_submit *submit); int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu); void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu); int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c index 14e24ac6573febea9d7c891f9f45dd7048e1a4e0..7a8c9473174863d74cca9ce3856e7b058f557043 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c @@ -70,9 +70,8 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain) return -ENOMEM; } - for (i = 0; i < PT_ENTRIES; i++) - etnaviv_domain->pgtable_cpu[i] = - etnaviv_domain->base.bad_page_dma; + memset32(etnaviv_domain->pgtable_cpu, etnaviv_domain->base.bad_page_dma, + PT_ENTRIES); return 0; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c index fc60fc8ddbf0198ae0a82b4c4c123bba6fb477fe..1e956e266aa38428efb0fb680656a12c184e1653 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c @@ -229,7 +229,7 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu) prefetch = etnaviv_buffer_config_mmuv2(gpu, (u32)etnaviv_domain->mtlb_dma, (u32)etnaviv_domain->base.bad_page_dma); - etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(gpu->buffer), + etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer), prefetch); etnaviv_gpu_wait_idle(gpu, 100); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index 35074b9447780f5e639f56f3fdf80dc6e29481ec..d113fe06e6b57e7aab168d320d77ad0c3ceae506 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -263,18 +263,16 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) { mapping->iova = iova; list_add_tail(&mapping->mmu_node, &mmu->mappings); - mutex_unlock(&mmu->lock); - return 0; + ret = 0; + goto unlock; } } node = &mapping->vram_node; ret = etnaviv_iommu_find_iova(mmu, node, etnaviv_obj->base.size); - if (ret < 0) { - mutex_unlock(&mmu->lock); - return ret; - } + if (ret < 0) + goto unlock; mmu->last_iova = node->start + etnaviv_obj->base.size; mapping->iova = node->start; @@ -283,12 +281,12 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu, if (ret < 0) { drm_mm_remove_node(node); - mutex_unlock(&mmu->lock); - return ret; + goto unlock; } list_add_tail(&mapping->mmu_node, &mmu->mappings); mmu->need_flush = true; +unlock: mutex_unlock(&mmu->lock); return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c index 768f5aafdd186f8fb97b5b0d1f957128f98e8e36..26dddfc41aacb5ee43708a7893988d14959350b8 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c @@ -479,9 +479,9 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r, } void etnaviv_perfmon_process(struct etnaviv_gpu *gpu, - const struct etnaviv_perfmon_request *pmr) + const struct etnaviv_perfmon_request *pmr, u32 exec_state) { - const struct etnaviv_pm_domain_meta *meta = &doms_meta[gpu->exec_state]; + const struct etnaviv_pm_domain_meta *meta = &doms_meta[exec_state]; const struct etnaviv_pm_domain *dom; const struct etnaviv_pm_signal *sig; u32 *bo = pmr->bo_vma; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h index 35dce194cb0042eff1349ba06ae3237515a638bb..c1653c64ab6b1e954fca5d70154dc348c05a50be 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h @@ -44,6 +44,6 @@ int etnaviv_pm_req_validate(const struct drm_etnaviv_gem_submit_pmr *r, u32 exec_state); void etnaviv_perfmon_process(struct etnaviv_gpu *gpu, - const struct etnaviv_perfmon_request *pmr); + const struct etnaviv_perfmon_request *pmr, u32 exec_state); #endif /* __ETNAVIV_PERFMON_H__ */ diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 5a7c9d8abd6b70f6893b303c34718b887af13182..735ce47688f9269c8ef2b1c4a220ced80440ca54 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -95,26 +95,21 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. -config DRM_EXYNOS_IPP - bool "Image Post Processor" - help - Choose this option if you want to use IPP feature for DRM. - config DRM_EXYNOS_FIMC bool "FIMC" - depends on DRM_EXYNOS_IPP && MFD_SYSCON + depends on BROKEN && MFD_SYSCON help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on DRM_EXYNOS_IPP + depends on BROKEN help Choose this option if you want to use Exynos Rotator for DRM. config DRM_EXYNOS_GSC bool "GScaler" - depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index bdf4212dde7b9bd97a354d93ac50f539051f29b8..a51c5459bb135bb7b63f3e9b4492833336de949b 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,7 +18,6 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o -exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 6be5b53c3b279f42ee6dbcc445173fa539721ce5..1c330f2a7a5d11b09b3dbdf4a86cc4839788aa9a 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -21,13 +21,12 @@ #include #include -#include