diff --git a/Documentation/Makefile b/Documentation/Makefile index a42320385df34eed57b2ece17e9f859ae1a92a2d..85f7856f009203d9e7d15487e452a8b9771c3784 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -22,6 +22,8 @@ ifeq ($(HAVE_SPHINX),0) .DEFAULT: $(warning The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed and in PATH, or set the SPHINXBUILD make variable to point to the full path of the '$(SPHINXBUILD)' executable.) + @echo + @./scripts/sphinx-pre-install @echo " SKIP Sphinx $@ target." else # HAVE_SPHINX @@ -95,16 +97,6 @@ endif # HAVE_SPHINX # The following targets are independent of HAVE_SPHINX, and the rules should # work or silently pass without Sphinx. -# no-ops for the Sphinx toolchain -sgmldocs: - @: -psdocs: - @: -mandocs: - @: -installmandocs: - @: - cleandocs: $(Q)rm -rf $(BUILDDIR) $(Q)$(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media clean diff --git a/Documentation/arm/firmware.txt b/Documentation/arm/firmware.txt index da6713adac8acffc5924d447473351ab84c2d289..7f175dbb427e631acafdb7b72d07023c3eb8e540 100644 --- a/Documentation/arm/firmware.txt +++ b/Documentation/arm/firmware.txt @@ -60,7 +60,7 @@ Example of using a firmware operation: /* some platform code, e.g. SMP initialization */ - __raw_writel(virt_to_phys(exynos4_secondary_startup), + __raw_writel(__pa_symbol(exynos4_secondary_startup), CPU1_BOOT_REG); /* Call Exynos specific smc call */ diff --git a/Documentation/conf.py b/Documentation/conf.py index 71b032bb44fd14b772512d9f2769cc8c47569389..f9054ab60cb1117b12f0f15c07a8199bb3a0ab37 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -29,7 +29,7 @@ from load_config import loadConfig # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.2' +needs_sphinx = '1.3' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -344,8 +344,8 @@ if major == 1 and minor > 3: if major == 1 and minor <= 4: latex_elements['preamble'] += '\\usepackage[margin=0.5in, top=1in, bottom=1in]{geometry}' elif major == 1 and (minor > 5 or (minor == 5 and patch >= 3)): - latex_elements['sphinxsetup'] = 'hmargin=0.5in, vmargin=0.5in' - + latex_elements['sphinxsetup'] = 'hmargin=0.5in, vmargin=1in' + latex_elements['preamble'] += '\\fvset{fontsize=auto}\n' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, diff --git a/Documentation/core-api/genalloc.rst b/Documentation/core-api/genalloc.rst new file mode 100644 index 0000000000000000000000000000000000000000..6b38a39fab24c5c7b901fa4a5ac6d2c93d8f37e0 --- /dev/null +++ b/Documentation/core-api/genalloc.rst @@ -0,0 +1,144 @@ +The genalloc/genpool subsystem +============================== + +There are a number of memory-allocation subsystems in the kernel, each +aimed at a specific need. Sometimes, however, a kernel developer needs to +implement a new allocator for a specific range of special-purpose memory; +often that memory is located on a device somewhere. The author of the +driver for that device can certainly write a little allocator to get the +job done, but that is the way to fill the kernel with dozens of poorly +tested allocators. Back in 2005, Jes Sorensen lifted one of those +allocators from the sym53c8xx_2 driver and posted_ it as a generic module +for the creation of ad hoc memory allocators. This code was merged +for the 2.6.13 release; it has been modified considerably since then. + +.. _posted: https://lwn.net/Articles/125842/ + +Code using this allocator should include . The action +begins with the creation of a pool using one of: + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_create + +.. kernel-doc:: lib/genalloc.c + :functions: devm_gen_pool_create + +A call to :c:func:`gen_pool_create` will create a pool. The granularity of +allocations is set with min_alloc_order; it is a log-base-2 number like +those used by the page allocator, but it refers to bytes rather than pages. +So, if min_alloc_order is passed as 3, then all allocations will be a +multiple of eight bytes. Increasing min_alloc_order decreases the memory +required to track the memory in the pool. The nid parameter specifies +which NUMA node should be used for the allocation of the housekeeping +structures; it can be -1 if the caller doesn't care. + +The "managed" interface :c:func:`devm_gen_pool_create` ties the pool to a +specific device. Among other things, it will automatically clean up the +pool when the given device is destroyed. + +A pool is shut down with: + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_destroy + +It's worth noting that, if there are still allocations outstanding from the +given pool, this function will take the rather extreme step of invoking +BUG(), crashing the entire system. You have been warned. + +A freshly created pool has no memory to allocate. It is fairly useless in +that state, so one of the first orders of business is usually to add memory +to the pool. That can be done with one of: + +.. kernel-doc:: include/linux/genalloc.h + :functions: gen_pool_add + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_add_virt + +A call to :c:func:`gen_pool_add` will place the size bytes of memory +starting at addr (in the kernel's virtual address space) into the given +pool, once again using nid as the node ID for ancillary memory allocations. +The :c:func:`gen_pool_add_virt` variant associates an explicit physical +address with the memory; this is only necessary if the pool will be used +for DMA allocations. + +The functions for allocating memory from the pool (and putting it back) +are: + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_alloc + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_dma_alloc + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_free + +As one would expect, :c:func:`gen_pool_alloc` will allocate size< bytes +from the given pool. The :c:func:`gen_pool_dma_alloc` variant allocates +memory for use with DMA operations, returning the associated physical +address in the space pointed to by dma. This will only work if the memory +was added with :c:func:`gen_pool_add_virt`. Note that this function +departs from the usual genpool pattern of using unsigned long values to +represent kernel addresses; it returns a void * instead. + +That all seems relatively simple; indeed, some developers clearly found it +to be too simple. After all, the interface above provides no control over +how the allocation functions choose which specific piece of memory to +return. If that sort of control is needed, the following functions will be +of interest: + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_alloc_algo + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_set_algo + +Allocations with :c:func:`gen_pool_alloc_algo` specify an algorithm to be +used to choose the memory to be allocated; the default algorithm can be set +with :c:func:`gen_pool_set_algo`. The data value is passed to the +algorithm; most ignore it, but it is occasionally needed. One can, +naturally, write a special-purpose algorithm, but there is a fair set +already available: + +- gen_pool_first_fit is a simple first-fit allocator; this is the default + algorithm if none other has been specified. + +- gen_pool_first_fit_align forces the allocation to have a specific + alignment (passed via data in a genpool_data_align structure). + +- gen_pool_first_fit_order_align aligns the allocation to the order of the + size. A 60-byte allocation will thus be 64-byte aligned, for example. + +- gen_pool_best_fit, as one would expect, is a simple best-fit allocator. + +- gen_pool_fixed_alloc allocates at a specific offset (passed in a + genpool_data_fixed structure via the data parameter) within the pool. + If the indicated memory is not available the allocation fails. + +There is a handful of other functions, mostly for purposes like querying +the space available in the pool or iterating through chunks of memory. +Most users, however, should not need much beyond what has been described +above. With luck, wider awareness of this module will help to prevent the +writing of special-purpose memory allocators in the future. + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_virt_to_phys + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_for_each_chunk + +.. kernel-doc:: lib/genalloc.c + :functions: addr_in_gen_pool + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_avail + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_size + +.. kernel-doc:: lib/genalloc.c + :functions: gen_pool_get + +.. kernel-doc:: lib/genalloc.c + :functions: of_gen_pool_get diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 0606be3a3111819de772c3ce13ba61c0915898e5..d5bbe035316d5b69ffc8fbf3bb5465323ae68339 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -20,6 +20,7 @@ Core utilities genericirq flexible-arrays librs + genalloc Interfaces for kernel debugging =============================== diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst b/Documentation/dev-tools/gdb-kernel-debugging.rst index 5e93c9bc6619da5d75a75179d858d60004e6f843..19df79286f000cac9ef12e1241b496dcce96dbe6 100644 --- a/Documentation/dev-tools/gdb-kernel-debugging.rst +++ b/Documentation/dev-tools/gdb-kernel-debugging.rst @@ -31,11 +31,13 @@ Setup CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports CONFIG_FRAME_POINTER, keep it enabled. -- Install that kernel on the guest. +- Install that kernel on the guest, turn off KASLR if necessary by adding + "nokaslr" to the kernel command line. Alternatively, QEMU allows to boot the kernel directly using -kernel, -append, -initrd command line switches. This is generally only useful if you do not depend on modules. See QEMU documentation for more details on - this mode. + this mode. In this case, you should build the kernel with + CONFIG_RANDOMIZE_BASE disabled if the architecture supports KASLR. - Enable the gdb stub of QEMU/KVM, either diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst index 75273203a35a25bcab3e4b90e4b368941fdc2b2a..d38be58f872af6511a5d9568d586fff09e7c504d 100644 --- a/Documentation/dev-tools/kgdb.rst +++ b/Documentation/dev-tools/kgdb.rst @@ -348,6 +348,15 @@ default behavior is always set to 0. - ``echo 1 > /sys/module/debug_core/parameters/kgdbreboot`` - Enter the debugger on reboot notify. +Kernel parameter: ``nokaslr`` +----------------------------- + +If the architecture that you are using enable KASLR by default, +you should consider turning it off. KASLR randomizes the +virtual address where the kernel image is mapped and confuse +gdb which resolve kernel symbol address from symbol table +of vmlinux. + Using kdb ========= @@ -358,7 +367,7 @@ This is a quick example of how to use kdb. 1. Configure kgdboc at boot using kernel parameters:: - console=ttyS0,115200 kgdboc=ttyS0,115200 + console=ttyS0,115200 kgdboc=ttyS0,115200 nokaslr OR diff --git a/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt b/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt new file mode 100644 index 0000000000000000000000000000000000000000..b13adf30b8d3b4d21322a91baa8912bafc0083a1 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt @@ -0,0 +1,32 @@ +Synopsys DesignWare MIPI DSI host controller +============================================ + +This document defines device tree properties for the Synopsys DesignWare MIPI +DSI host controller. It doesn't constitue a device tree binding specification +by itself but is meant to be referenced by platform-specific device tree +bindings. + +When referenced from platform device tree bindings the properties defined in +this document are defined as follows. The platform device tree bindings are +responsible for defining whether each optional property is used or not. + +- reg: Memory mapped base address and length of the DesignWare MIPI DSI + host controller registers. (mandatory) + +- clocks: References to all the clocks specified in the clock-names property + as specified in [1]. (mandatory) + +- clock-names: + - "pclk" is the peripheral clock for either AHB and APB. (mandatory) + - "px_clk" is the pixel clock for the DPI/RGB input. (optional) + +- resets: References to all the resets specified in the reset-names property + as specified in [2]. (optional) + +- reset-names: string reset name, must be "apb" if used. (optional) + +- panel or bridge node: see [3]. (mandatory) + +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt +[2] Documentation/devicetree/bindings/reset/reset.txt +[3] Documentation/devicetree/bindings/display/mipi-dsi-bus.txt diff --git a/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt b/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt index 549c538b38a5b7a9e31b8366a954d1a79d57edb6..fc2588292a68173347521e87ce80b9fa1b9ea19a 100644 --- a/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt +++ b/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt @@ -25,12 +25,6 @@ Required properties: size-cells must 1 and 0, respectively. - port: contains an endpoint node which is connected to the endpoint in the mic node. The reg value muset be 0. -- i80-if-timings: specify whether the panel which is connected to decon uses - i80 lcd interface or mipi video interface. This node contains - no timing information as that of fimd does. Because there is - no register in decon to specify i80 interface timing value, - it is not needed, but make it remain to use same kind of node - in fimd and exynos7 decon. Example: SoC specific DT entry: @@ -59,9 +53,3 @@ decon: decon@13800000 { }; }; }; - -Board specific DT entry: -&decon { - i80-if-timings { - }; -}; diff --git a/Documentation/devicetree/bindings/display/repaper.txt b/Documentation/devicetree/bindings/display/repaper.txt new file mode 100644 index 0000000000000000000000000000000000000000..f5f9f9cf6a25bf5ad4ed3022a4c6d71e45b66225 --- /dev/null +++ b/Documentation/devicetree/bindings/display/repaper.txt @@ -0,0 +1,52 @@ +Pervasive Displays RePaper branded e-ink displays + +Required properties: +- compatible: "pervasive,e1144cs021" for 1.44" display + "pervasive,e1190cs021" for 1.9" display + "pervasive,e2200cs021" for 2.0" display + "pervasive,e2271cs021" for 2.7" display + +- panel-on-gpios: Timing controller power control +- discharge-gpios: Discharge control +- reset-gpios: RESET pin +- busy-gpios: BUSY pin + +Required property for e2271cs021: +- border-gpios: Border control + +The node for this driver must be a child node of a SPI controller, hence +all mandatory properties described in ../spi/spi-bus.txt must be specified. + +Optional property: +- pervasive,thermal-zone: name of thermometer's thermal zone + +Example: + + display_temp: lm75@48 { + compatible = "lm75b"; + reg = <0x48>; + #thermal-sensor-cells = <0>; + }; + + thermal-zones { + display { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&display_temp>; + }; + }; + + papirus27@0{ + compatible = "pervasive,e2271cs021"; + reg = <0>; + + spi-max-frequency = <8000000>; + + panel-on-gpios = <&gpio 23 0>; + border-gpios = <&gpio 14 0>; + discharge-gpios = <&gpio 15 0>; + reset-gpios = <&gpio 24 0>; + busy-gpios = <&gpio 25 0>; + + pervasive,thermal-zone = "display"; + }; diff --git a/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt b/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt index 046076c6b2775b8124da53ce4647094fe7e3c96c..fad8b7619647661cac9de0df3a8bf1d0f1711074 100644 --- a/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt +++ b/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt @@ -11,7 +11,9 @@ following device-specific properties. Required properties: -- compatible: Shall contain "rockchip,rk3288-dw-hdmi". +- compatible: should be one of the following: + "rockchip,rk3288-dw-hdmi" + "rockchip,rk3399-dw-hdmi" - reg: See dw_hdmi.txt. - reg-io-width: See dw_hdmi.txt. Shall be 4. - interrupts: HDMI interrupt number @@ -30,7 +32,8 @@ Optional properties I2C master controller. - clock-names: See dw_hdmi.txt. The "cec" clock is optional. - clock-names: May contain "cec" as defined in dw_hdmi.txt. - +- clock-names: May contain "grf", power for grf io. +- clock-names: May contain "vpll", external clock for some hdmi phy. Example: diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt index 9eb3f0a2a078b4e1f62d71daf123cf7d8363b683..5d835d9c1ba807540d1bc065640513ab092dea42 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt @@ -8,8 +8,12 @@ Required properties: - compatible: value should be one of the following "rockchip,rk3036-vop"; "rockchip,rk3288-vop"; + "rockchip,rk3368-vop"; + "rockchip,rk3366-vop"; "rockchip,rk3399-vop-big"; "rockchip,rk3399-vop-lit"; + "rockchip,rk3228-vop"; + "rockchip,rk3328-vop"; - interrupts: should contain a list of all VOP IP block interrupts in the order: VSYNC, LCD_SYSTEM. The interrupt specifier diff --git a/Documentation/devicetree/bindings/display/sitronix,st7586.txt b/Documentation/devicetree/bindings/display/sitronix,st7586.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d0dad1210d380849370738dbfb6a7b0e07773e8 --- /dev/null +++ b/Documentation/devicetree/bindings/display/sitronix,st7586.txt @@ -0,0 +1,22 @@ +Sitronix ST7586 display panel + +Required properties: +- compatible: "lego,ev3-lcd". +- a0-gpios: The A0 signal (since this binding is for serial mode, this is + the pin labeled D1 on the controller, not the pin labeled A0) +- reset-gpios: Reset pin + +The node for this driver must be a child node of a SPI controller, hence +all mandatory properties described in ../spi/spi-bus.txt must be specified. + +Optional properties: +- rotation: panel rotation in degrees counter clockwise (0,90,180,270) + +Example: + display@0{ + compatible = "lego,ev3-lcd"; + reg = <0>; + spi-max-frequency = <10000000>; + a0-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; + }; diff --git a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt index 8e1476941c0ff86b4ae4d032975f60e45616346d..74b5ac7b26d65278e7453301be2b8491e1c6668c 100644 --- a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt +++ b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt @@ -1,7 +1,6 @@ * STMicroelectronics STM32 lcd-tft display controller - ltdc: lcd-tft display controller host - must be a sub-node of st-display-subsystem Required properties: - compatible: "st,stm32-ltdc" - reg: Physical base address of the IP registers and length of memory mapped region. @@ -13,8 +12,40 @@ Required nodes: - Video port for RGB output. -Example: +* STMicroelectronics STM32 DSI controller specific extensions to Synopsys + DesignWare MIPI DSI host controller +The STMicroelectronics STM32 DSI controller uses the Synopsys DesignWare MIPI +DSI host controller. For all mandatory properties & nodes, please refer +to the related documentation in [5]. + +Mandatory properties specific to STM32 DSI: +- #address-cells: Should be <1>. +- #size-cells: Should be <0>. +- compatible: "st,stm32-dsi". +- clock-names: + - phy pll reference clock string name, must be "ref". +- resets: see [5]. +- reset-names: see [5]. + +Mandatory nodes specific to STM32 DSI: +- ports: A node containing DSI input & output port nodes with endpoint + definitions as documented in [3] & [4]. + - port@0: DSI input port node, connected to the ltdc rgb output port. + - port@1: DSI output port node, connected to a panel or a bridge input port. +- panel or bridge node: A node containing the panel or bridge description as + documented in [6]. + - port: panel or bridge port node, connected to the DSI output port (port@1). + +Note: You can find more documentation in the following references +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt +[2] Documentation/devicetree/bindings/reset/reset.txt +[3] Documentation/devicetree/bindings/media/video-interfaces.txt +[4] Documentation/devicetree/bindings/graph.txt +[5] Documentation/devicetree/bindings/display/bridge/dw_mipi_dsi.txt +[6] Documentation/devicetree/bindings/display/mipi-dsi-bus.txt + +Example 1: RGB panel / { ... soc { @@ -34,3 +65,73 @@ Example: }; }; }; + +Example 2: DSI panel + +/ { + ... + soc { + ... + ltdc: display-controller@40016800 { + compatible = "st,stm32-ltdc"; + reg = <0x40016800 0x200>; + interrupts = <88>, <89>; + resets = <&rcc STM32F4_APB2_RESET(LTDC)>; + clocks = <&rcc 1 CLK_LCD>; + clock-names = "lcd"; + + port { + ltdc_out_dsi: endpoint { + remote-endpoint = <&dsi_in>; + }; + }; + }; + + + dsi: dsi@40016c00 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "st,stm32-dsi"; + reg = <0x40016c00 0x800>; + clocks = <&rcc 1 CLK_F469_DSI>, <&clk_hse>; + clock-names = "ref", "pclk"; + resets = <&rcc STM32F4_APB2_RESET(DSI)>; + reset-names = "apb"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + dsi_in: endpoint { + remote-endpoint = <<dc_out_dsi>; + }; + }; + + port@1 { + reg = <1>; + dsi_out: endpoint { + remote-endpoint = <&dsi_in_panel>; + }; + }; + + }; + + panel-dsi@0 { + reg = <0>; /* dsi virtual channel (0..3) */ + compatible = ...; + enable-gpios = ...; + + port { + dsi_in_panel: endpoint { + remote-endpoint = <&dsi_out>; + }; + }; + + }; + + }; + + }; +}; diff --git a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt index b83e6018041d68d8fb0b552c3984233dab5b6d84..2ee6ff0ef98e7dcf6d16e977fa8dd852b10a247c 100644 --- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt +++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt @@ -4,15 +4,33 @@ Allwinner A10 Display Pipeline The Allwinner A10 Display pipeline is composed of several components that are going to be documented below: -For the input port of all components up to the TCON in the display -pipeline, if there are multiple components, the local endpoint IDs -must correspond to the index of the upstream block. For example, if -the remote endpoint is Frontend 1, then the local endpoint ID must -be 1. - -Conversely, for the output ports of the same group, the remote endpoint -ID must be the index of the local hardware block. If the local backend -is backend 1, then the remote endpoint ID must be 1. +For all connections between components up to the TCONs in the display +pipeline, when there are multiple components of the same type at the +same depth, the local endpoint ID must be the same as the remote +component's index. For example, if the remote endpoint is Frontend 1, +then the local endpoint ID must be 1. + + Frontend 0 [0] ------- [0] Backend 0 [0] ------- [0] TCON 0 + [1] -- -- [1] [1] -- -- [1] + \ / \ / + X X + / \ / \ + [0] -- -- [0] [0] -- -- [0] + Frontend 1 [1] ------- [1] Backend 1 [1] ------- [1] TCON 1 + +For a two pipeline system such as the one depicted above, the lines +represent the connections between the components, while the numbers +within the square brackets corresponds to the ID of the local endpoint. + +The same rule also applies to DE 2.0 mixer-TCON connections: + + Mixer 0 [0] ----------- [0] TCON 0 + [1] ---- ---- [1] + \ / + X + / \ + [0] ---- ---- [0] + Mixer 1 [1] ----------- [1] TCON 1 HDMI Encoder ------------ diff --git a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt index cf4460564adb20941f127008fdb0ec235dc5f7ff..367c8203213b198bd9bf3deeaa4389b430951475 100644 --- a/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt +++ b/Documentation/devicetree/bindings/hwmon/aspeed-pwm-tacho.txt @@ -11,6 +11,8 @@ Required properties for pwm-tacho node: - #size-cells : should be 1. +- #cooling-cells: should be 2. + - reg : address and length of the register set for the device. - pinctrl-names : a pinctrl state named "default" must be defined. @@ -28,12 +30,17 @@ fan subnode format: Under fan subnode there can upto 8 child nodes, with each child node representing a fan. If there are 8 fans each fan can have one PWM port and one/two Fan tach inputs. +For PWM port can be configured cooling-levels to create cooling device. +Cooling device could be bound to a thermal zone for the thermal control. Required properties for each child node: - reg : should specify PWM source port. integer value in the range 0 to 7 with 0 indicating PWM port A and 7 indicating PWM port H. +- cooling-levels: PWM duty cycle values in a range from 0 to 255 + which correspond to thermal cooling states. + - aspeed,fan-tach-ch : should specify the Fan tach input channel. integer value in the range 0 through 15, with 0 indicating Fan tach channel 0 and 15 indicating Fan tach channel 15. @@ -50,6 +57,7 @@ pwm_tacho_fixed_clk: fixedclk { pwm_tacho: pwmtachocontroller@1e786000 { #address-cells = <1>; #size-cells = <1>; + #cooling-cells = <2>; reg = <0x1E786000 0x1000>; compatible = "aspeed,ast2500-pwm-tacho"; clocks = <&pwm_tacho_fixed_clk>; @@ -58,6 +66,7 @@ pwm_tacho: pwmtachocontroller@1e786000 { fan@0 { reg = <0x00>; + cooling-levels = /bits/ 8 <125 151 177 203 229 255>; aspeed,fan-tach-ch = /bits/ 8 <0x00>; }; diff --git a/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt b/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt new file mode 100644 index 0000000000000000000000000000000000000000..f68a0a68fc524cdc1ee547313892a34e78248800 --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/ibm,cffps1.txt @@ -0,0 +1,21 @@ +Device-tree bindings for IBM Common Form Factor Power Supply Version 1 +---------------------------------------------------------------------- + +Required properties: + - compatible = "ibm,cffps1"; + - reg = < I2C bus address >; : Address of the power supply on the + I2C bus. + +Example: + + i2c-bus@100 { + #address-cells = <1>; + #size-cells = <0>; + #interrupt-cells = <1>; + < more properties > + + power-supply@68 { + compatible = "ibm,cffps1"; + reg = <0x68>; + }; + }; diff --git a/Documentation/devicetree/bindings/hwmon/ltq-cputemp.txt b/Documentation/devicetree/bindings/hwmon/ltq-cputemp.txt new file mode 100644 index 0000000000000000000000000000000000000000..33fd00a987c7d924cd7b1b67970c2c6ee9100b79 --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/ltq-cputemp.txt @@ -0,0 +1,10 @@ +Lantiq cpu temperatur sensor + +Requires node properties: +- compatible value : + "lantiq,cputemp" + +Example: + cputemp@0 { + compatible = "lantiq,cputemp"; + }; diff --git a/Documentation/devicetree/bindings/net/dwmac-sun8i.txt b/Documentation/devicetree/bindings/net/dwmac-sun8i.txt deleted file mode 100644 index 725f3b18788614f2d7bd8abdb2201d5d216ed7be..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/net/dwmac-sun8i.txt +++ /dev/null @@ -1,84 +0,0 @@ -* Allwinner sun8i GMAC ethernet controller - -This device is a platform glue layer for stmmac. -Please see stmmac.txt for the other unchanged properties. - -Required properties: -- compatible: should be one of the following string: - "allwinner,sun8i-a83t-emac" - "allwinner,sun8i-h3-emac" - "allwinner,sun8i-v3s-emac" - "allwinner,sun50i-a64-emac" -- reg: address and length of the register for the device. -- interrupts: interrupt for the device -- interrupt-names: should be "macirq" -- clocks: A phandle to the reference clock for this device -- clock-names: should be "stmmaceth" -- resets: A phandle to the reset control for this device -- reset-names: should be "stmmaceth" -- phy-mode: See ethernet.txt -- phy-handle: See ethernet.txt -- #address-cells: shall be 1 -- #size-cells: shall be 0 -- syscon: A phandle to the syscon of the SoC with one of the following - compatible string: - - allwinner,sun8i-h3-system-controller - - allwinner,sun8i-v3s-system-controller - - allwinner,sun50i-a64-system-controller - - allwinner,sun8i-a83t-system-controller - -Optional properties: -- allwinner,tx-delay-ps: TX clock delay chain value in ps. Range value is 0-700. Default is 0) -- allwinner,rx-delay-ps: RX clock delay chain value in ps. Range value is 0-3100. Default is 0) -Both delay properties need to be a multiple of 100. They control the delay for -external PHY. - -Optional properties for the following compatibles: - - "allwinner,sun8i-h3-emac", - - "allwinner,sun8i-v3s-emac": -- allwinner,leds-active-low: EPHY LEDs are active low - -Required child node of emac: -- mdio bus node: should be named mdio - -Required properties of the mdio node: -- #address-cells: shall be 1 -- #size-cells: shall be 0 - -The device node referenced by "phy" or "phy-handle" should be a child node -of the mdio node. See phy.txt for the generic PHY bindings. - -Required properties of the phy node with the following compatibles: - - "allwinner,sun8i-h3-emac", - - "allwinner,sun8i-v3s-emac": -- clocks: a phandle to the reference clock for the EPHY -- resets: a phandle to the reset control for the EPHY - -Example: - -emac: ethernet@1c0b000 { - compatible = "allwinner,sun8i-h3-emac"; - syscon = <&syscon>; - reg = <0x01c0b000 0x104>; - interrupts = ; - interrupt-names = "macirq"; - resets = <&ccu RST_BUS_EMAC>; - reset-names = "stmmaceth"; - clocks = <&ccu CLK_BUS_EMAC>; - clock-names = "stmmaceth"; - #address-cells = <1>; - #size-cells = <0>; - - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - mdio: mdio { - #address-cells = <1>; - #size-cells = <0>; - int_mii_phy: ethernet-phy@1 { - reg = <1>; - clocks = <&ccu CLK_BUS_EPHY>; - resets = <&ccu RST_BUS_EPHY>; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index daf465bef75898c1b1960d456c82b666c4f9dca6..36b27b1efec675ef38a1fece766ee3c0ef0bd07f 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -249,6 +249,7 @@ oxsemi Oxford Semiconductor, Ltd. panasonic Panasonic Corporation parade Parade Technologies Inc. pericom Pericom Technology Inc. +pervasive Pervasive Displays, Inc. phytec PHYTEC Messtechnik GmbH picochip Picochip Ltd pine64 Pine64 diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 84e8e8a9cbdb0431ef2e67fca56b39475a633e5e..a2417633fdd82205566cffee6fdd13998e9338b5 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -19,6 +19,110 @@ Finally, there are thousands of plain text documentation files scattered around ``Documentation``. Some of these will likely be converted to reStructuredText over time, but the bulk of them will remain in plain text. +.. _sphinx_install: + +Sphinx Install +============== + +The ReST markups currently used by the Documentation/ files are meant to be +built with ``Sphinx`` version 1.3 or upper. If you're desiring to build +PDF outputs, it is recommended to use version 1.4.6 or upper. + +There's a script that checks for the Spinx requirements. Please see +:ref:`sphinx-pre-install` for further details. + +Most distributions are shipped with Sphinx, but its toolchain is fragile, +and it is not uncommon that upgrading it or some other Python packages +on your machine would cause the documentation build to break. + +A way to get rid of that is to use a different version than the one shipped +on your distributions. In order to do that, it is recommended to install +Sphinx inside a virtual environment, using ``virtualenv-3`` +or ``virtualenv``, depending on how your distribution packaged Python 3. + +.. note:: + + #) Sphinx versions below 1.5 don't work properly with Python's + docutils version 0.13.1 or upper. So, if you're willing to use + those versions, you should run ``pip install 'docutils==0.12'``. + + #) It is recommended to use the RTD theme for html output. Depending + on the Sphinx version, it should be installed in separate, + with ``pip install sphinx_rtd_theme``. + + #) Some ReST pages contain math expressions. Due to the way Sphinx work, + those expressions are written using LaTeX notation. It needs texlive + installed with amdfonts and amsmath in order to evaluate them. + +In summary, if you want to install Sphinx version 1.4.9, you should do:: + + $ virtualenv sphinx_1.4 + $ . sphinx_1.4/bin/activate + (sphinx_1.4) $ pip install -r Documentation/sphinx/requirements.txt + +After running ``. sphinx_1.4/bin/activate``, the prompt will change, +in order to indicate that you're using the new environment. If you +open a new shell, you need to rerun this command to enter again at +the virtual environment before building the documentation. + +Image output +------------ + +The kernel documentation build system contains an extension that +handles images on both GraphViz and SVG formats (see +:ref:`sphinx_kfigure`). + +For it to work, you need to install both GraphViz and ImageMagick +packages. If those packages are not installed, the build system will +still build the documentation, but won't include any images at the +output. + +PDF and LaTeX builds +-------------------- + +Such builds are currently supported only with Sphinx versions 1.4 and upper. + +For PDF and LaTeX output, you'll also need ``XeLaTeX`` version 3.14159265. + +Depending on the distribution, you may also need to install a series of +``texlive`` packages that provide the minimal set of functionalities +required for ``XeLaTeX`` to work. + +.. _sphinx-pre-install: + +Checking for Sphinx dependencies +-------------------------------- + +There's a script that automatically check for Sphinx dependencies. If it can +recognize your distribution, it will also give a hint about the install +command line options for your distro:: + + $ ./scripts/sphinx-pre-install + Checking if the needed tools for Fedora release 26 (Twenty Six) are available + Warning: better to also install "texlive-luatex85". + You should run: + + sudo dnf install -y texlive-luatex85 + /usr/bin/virtualenv sphinx_1.4 + . sphinx_1.4/bin/activate + pip install -r Documentation/sphinx/requirements.txt + + Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468. + +By default, it checks all the requirements for both html and PDF, including +the requirements for images, math expressions and LaTeX build, and assumes +that a virtual Python environment will be used. The ones needed for html +builds are assumed to be mandatory; the others to be optional. + +It supports two optional parameters: + +``--no-pdf`` + Disable checks for PDF; + +``--no-virtualenv`` + Use OS packaging for Sphinx instead of Python virtual environment. + + Sphinx Build ============ @@ -118,7 +222,7 @@ Here are some specific guidelines for the kernel documentation: the C domain ------------ -The `Sphinx C Domain`_ (name c) is suited for documentation of C API. E.g. a +The **Sphinx C Domain** (name c) is suited for documentation of C API. E.g. a function prototype: .. code-block:: rst @@ -229,6 +333,7 @@ Rendered as: - column 3 +.. _sphinx_kfigure: Figures & Images ================ diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst index ab82250c7727309d2aace9284c26316c770c1fb5..73fa7d42bbbaed3b52a52c0d59a2efb778751f66 100644 --- a/Documentation/driver-api/basics.rst +++ b/Documentation/driver-api/basics.rst @@ -4,7 +4,7 @@ Driver Basics Driver Entry and Exit points ---------------------------- -.. kernel-doc:: include/linux/init.h +.. kernel-doc:: include/linux/module.h :internal: Driver device table @@ -103,9 +103,6 @@ Kernel utility functions .. kernel-doc:: kernel/panic.c :export: -.. kernel-doc:: kernel/sys.c - :export: - .. kernel-doc:: kernel/rcu/tree.c :export: diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 31671b469627ff2b439f891d32a03b5be02403c1..dc384f2f7f34c7fde3b892d62aefebf87daab3d8 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -139,9 +139,6 @@ DMA Fences Seqno Hardware Fences ~~~~~~~~~~~~~~~~~~~~~ -.. kernel-doc:: drivers/dma-buf/seqno-fence.c - :export: - .. kernel-doc:: include/linux/seqno-fence.h :internal: diff --git a/Documentation/driver-api/miscellaneous.rst b/Documentation/driver-api/miscellaneous.rst index 8da7d115bafcae2a81828cda2d27b43872068151..304ffb146cf9c195832cae3a998eb4712910488a 100644 --- a/Documentation/driver-api/miscellaneous.rst +++ b/Documentation/driver-api/miscellaneous.rst @@ -47,4 +47,3 @@ used by one consumer at a time. .. kernel-doc:: drivers/pwm/core.c :export: - diff --git a/Documentation/driver-api/s390-drivers.rst b/Documentation/driver-api/s390-drivers.rst index 7060da1360951b6747b704cb3cf3baf4c0bc4c08..ecf8851d35651161542f65a62c5c1d7ed631ffd8 100644 --- a/Documentation/driver-api/s390-drivers.rst +++ b/Documentation/driver-api/s390-drivers.rst @@ -75,7 +75,7 @@ The channel-measurement facility provides a means to collect measurement data which is made available by the channel subsystem for each channel attached device. -.. kernel-doc:: arch/s390/include/asm/cmb.h +.. kernel-doc:: arch/s390/include/uapi/asm/cmb.h :internal: .. kernel-doc:: drivers/s390/cio/cmf.c diff --git a/Documentation/driver-api/scsi.rst b/Documentation/driver-api/scsi.rst index 859fb672319f9ca728460a1076884cd551c45195..5a2aa7a377d9bd9fe08e5519cfa285d5fa5083f5 100644 --- a/Documentation/driver-api/scsi.rst +++ b/Documentation/driver-api/scsi.rst @@ -224,14 +224,6 @@ mid to lowlevel SCSI driver interface .. kernel-doc:: drivers/scsi/hosts.c :export: -drivers/scsi/constants.c -~~~~~~~~~~~~~~~~~~~~~~~~ - -mid to lowlevel SCSI driver interface - -.. kernel-doc:: drivers/scsi/constants.c - :export: - Transport classes ----------------- diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt index 5e97a89420ef09ceaa6c8fb7d89c7ca771d955ef..dfb638c2f8421369ac73a71279024fb9a8587daa 100644 --- a/Documentation/features/core/tracehook/arch-support.txt +++ b/Documentation/features/core/tracehook/arch-support.txt @@ -25,7 +25,7 @@ | mn10300: | ok | | nios2: | ok | | openrisc: | ok | - | parisc: | TODO | + | parisc: | ok | | powerpc: | ok | | s390: | ok | | score: | TODO | diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 73e7d91f03dce56266ad1806e056c2a4f7e5321e..405a3df759b33938dc9b6cc4e819673538f973a1 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -829,9 +829,7 @@ struct address_space_operations { swap_activate: Called when swapon is used on a file to allocate space if necessary and pin the block lookup information in memory. A return value of zero indicates success, - in which case this file can be used to back swapspace. The - swapspace operations will be proxied to this address space's - ->swap_{out,in} methods. + in which case this file can be used to back swapspace. swap_deactivate: Called during swapoff on files where swap_activate was successful. diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst index 0d936c67bf7d78a84a38d284681d142ad34031d4..5ee9674fb9e981db2c17c0b6f1a08402b5ab5290 100644 --- a/Documentation/gpu/drm-internals.rst +++ b/Documentation/gpu/drm-internals.rst @@ -201,6 +201,8 @@ drivers. Open/Close, File Operations and IOCTLs ====================================== +.. _drm_driver_fops: + File Operations --------------- diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 7c5e2549a58a61091b286e88c2171cbc9439668f..13dd237418cc4595159c2037ae2e9bbdfd41f0de 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -296,3 +296,12 @@ Auxiliary Modeset Helpers .. kernel-doc:: drivers/gpu/drm/drm_modeset_helper.c :export: + +Framebuffer GEM Helper Reference +================================ + +.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c + :export: diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 2d77c958016483ca294ffe9a58f5dbd72f2dbd0f..307284125d7aa964522f85ea6fb34247e67c9de2 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -523,9 +523,6 @@ Color Management Properties .. kernel-doc:: drivers/gpu/drm/drm_color_mgmt.c :doc: overview -.. kernel-doc:: include/drm/drm_color_mgmt.h - :internal: - .. kernel-doc:: drivers/gpu/drm/drm_color_mgmt.c :export: @@ -554,60 +551,8 @@ various modules/drivers. Vertical Blanking ================= -Vertical blanking plays a major role in graphics rendering. To achieve -tear-free display, users must synchronize page flips and/or rendering to -vertical blanking. The DRM API offers ioctls to perform page flips -synchronized to vertical blanking and wait for vertical blanking. - -The DRM core handles most of the vertical blanking management logic, -which involves filtering out spurious interrupts, keeping race-free -blanking counters, coping with counter wrap-around and resets and -keeping use counts. It relies on the driver to generate vertical -blanking interrupts and optionally provide a hardware vertical blanking -counter. Drivers must implement the following operations. - -- int (\*enable_vblank) (struct drm_device \*dev, int crtc); void - (\*disable_vblank) (struct drm_device \*dev, int crtc); - Enable or disable vertical blanking interrupts for the given CRTC. - -- u32 (\*get_vblank_counter) (struct drm_device \*dev, int crtc); - Retrieve the value of the vertical blanking counter for the given - CRTC. If the hardware maintains a vertical blanking counter its value - should be returned. Otherwise drivers can use the - :c:func:`drm_vblank_count()` helper function to handle this - operation. - -Drivers must initialize the vertical blanking handling core with a call -to :c:func:`drm_vblank_init()` in their load operation. - -Vertical blanking interrupts can be enabled by the DRM core or by -drivers themselves (for instance to handle page flipping operations). -The DRM core maintains a vertical blanking use count to ensure that the -interrupts are not disabled while a user still needs them. To increment -the use count, drivers call :c:func:`drm_vblank_get()`. Upon -return vertical blanking interrupts are guaranteed to be enabled. - -To decrement the use count drivers call -:c:func:`drm_vblank_put()`. Only when the use count drops to zero -will the DRM core disable the vertical blanking interrupts after a delay -by scheduling a timer. The delay is accessible through the -vblankoffdelay module parameter or the ``drm_vblank_offdelay`` global -variable and expressed in milliseconds. Its default value is 5000 ms. -Zero means never disable, and a negative value means disable -immediately. Drivers may override the behaviour by setting the -:c:type:`struct drm_device ` -vblank_disable_immediate flag, which when set causes vblank interrupts -to be disabled immediately regardless of the drm_vblank_offdelay -value. The flag should only be set if there's a properly working -hardware vblank counter present. - -When a vertical blanking interrupt occurs drivers only need to call the -:c:func:`drm_handle_vblank()` function to account for the -interrupt. - -Resources allocated by :c:func:`drm_vblank_init()` must be freed -with a call to :c:func:`drm_vblank_cleanup()` in the driver unload -operation handler. +.. kernel-doc:: drivers/gpu/drm/drm_vblank.c + :doc: vblank handling Vertical Blanking and Interrupt Handling Functions Reference ------------------------------------------------------------ diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index 9412798645c1ae84e9d3d1a252d4ff9b0b12a1b3..b08e9dcd91771c39ab56216c506ece87ce755059 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -191,7 +191,7 @@ acquired and release by :c:func:`calling drm_gem_object_get()` and holding the lock. When the last reference to a GEM object is released the GEM core calls -the :c:type:`struct drm_driver ` gem_free_object +the :c:type:`struct drm_driver ` gem_free_object_unlocked operation. That operation is mandatory for GEM-enabled drivers and must free the GEM object and all associated resources. @@ -492,7 +492,7 @@ DRM Sync Objects :doc: Overview .. kernel-doc:: include/drm/drm_syncobj.h - :export: + :internal: .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 858457567d3df7460ab7074de88d82e24b8fab8e..679373b4a03f2189e14cce4c30686bdc947eb5a1 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -160,6 +160,8 @@ other hand, a driver requires shared state between clients which is visible to user-space and accessible beyond open-file boundaries, they cannot support render nodes. +.. _drm_driver_ioctl: + IOCTL Support on Device Nodes ============================= diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 9c7ed3e3f1e94007d3c1bd9aace282f8d8b96eba..2e7ee0313c1cd6c0377d2b4a3f4948641f9a84a2 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -417,6 +417,10 @@ integrate with drm/i915 and to handle the `DRM_I915_PERF_OPEN` ioctl. :functions: i915_perf_open_ioctl .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c :functions: i915_perf_release +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c + :functions: i915_perf_add_config_ioctl +.. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c + :functions: i915_perf_remove_config_ioctl i915 Perf Stream ---------------- @@ -477,4 +481,16 @@ specific details than found in the more high-level sections. .. kernel-doc:: drivers/gpu/drm/i915/i915_perf.c :internal: -.. WARNING: DOCPROC directive not supported: !Cdrivers/gpu/drm/i915/i915_irq.c +Style +===== + +The drm/i915 driver codebase has some style rules in addition to (and, in some +cases, deviating from) the kernel coding style. + +Register macro definition style +------------------------------- + +The style guide for ``i915_reg.h``. + +.. kernel-doc:: drivers/gpu/drm/i915/i915_reg.h + :doc: The i915 register macro definition style guide diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 1ae42006deea7814ce88b9ad121cbf56d246714f..22af55d06ab8d6be092cf9d8699a9c4e09cd5260 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -108,8 +108,8 @@ This would be especially useful for tinydrm: crtc state, clear that to the max values, x/y = 0 and w/h = MAX_INT, in __drm_atomic_helper_crtc_duplicate_state(). -- Move tinydrm_merge_clips into drm_framebuffer.c, dropping the tinydrm_ - prefix ofc and using drm_fb_. drm_framebuffer.c makes sense since this +- Move tinydrm_merge_clips into drm_framebuffer.c, dropping the tinydrm\_ + prefix ofc and using drm_fb\_. drm_framebuffer.c makes sense since this is a function useful to implement the fb->dirty function. - Create a new drm_fb_dirty function which does essentially what e.g. diff --git a/Documentation/hwmon/ftsteutates b/Documentation/hwmon/ftsteutates index 8c10a916de20d064a34fb43b121309b190eab98c..af54db92391bab26610150b2f2b20bcc081edc2d 100644 --- a/Documentation/hwmon/ftsteutates +++ b/Documentation/hwmon/ftsteutates @@ -18,6 +18,10 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and 8 fans. It also contains an integrated watchdog which is currently implemented in this driver. +To clear a temperature or fan alarm, execute the following command with the +correct path to the alarm file: + echo 0 >XXXX_alarm + Specification of the chip can be found here: ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf diff --git a/Documentation/hwmon/ibm-cffps b/Documentation/hwmon/ibm-cffps new file mode 100644 index 0000000000000000000000000000000000000000..e05ecd8ecfcfa54b247233af709d8134f086c92f --- /dev/null +++ b/Documentation/hwmon/ibm-cffps @@ -0,0 +1,54 @@ +Kernel driver ibm-cffps +======================= + +Supported chips: + * IBM Common Form Factor power supply + +Author: Eddie James + +Description +----------- + +This driver supports IBM Common Form Factor (CFF) power supplies. This driver +is a client to the core PMBus driver. + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + +Sysfs entries +------------- + +The following attributes are supported: + +curr1_alarm Output current over-current alarm. +curr1_input Measured output current in mA. +curr1_label "iout1" + +fan1_alarm Fan 1 warning. +fan1_fault Fan 1 fault. +fan1_input Fan 1 speed in RPM. +fan2_alarm Fan 2 warning. +fan2_fault Fan 2 fault. +fan2_input Fan 2 speed in RPM. + +in1_alarm Input voltage under-voltage alarm. +in1_input Measured input voltage in mV. +in1_label "vin" +in2_alarm Output voltage over-voltage alarm. +in2_input Measured output voltage in mV. +in2_label "vout1" + +power1_alarm Input fault or alarm. +power1_input Measured input power in uW. +power1_label "pin" + +temp1_alarm PSU inlet ambient temperature over-temperature alarm. +temp1_input Measured PSU inlet ambient temp in millidegrees C. +temp2_alarm Secondary rectifier temp over-temperature alarm. +temp2_input Measured secondary rectifier temp in millidegrees C. +temp3_alarm ORing FET temperature over-temperature alarm. +temp3_input Measured ORing FET temperature in millidegrees C. diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066 index 2cb20ebb234d4944956512992b1a2d1787c4cbef..3fa6bf820c888ddcd912681a106bfa626acf604d 100644 --- a/Documentation/hwmon/lm25066 +++ b/Documentation/hwmon/lm25066 @@ -29,6 +29,11 @@ Supported chips: Addresses scanned: - Datasheet: http://www.national.com/pf/LM/LM5066.html + * Texas Instruments LM5066I + Prefix: 'lm5066i' + Addresses scanned: - + Datasheet: + http://www.ti.com/product/LM5066I Author: Guenter Roeck @@ -37,8 +42,8 @@ Description ----------- This driver supports hardware monitoring for National Semiconductor / TI LM25056, -LM25063, LM25066, LM5064, and LM5066 Power Management, Monitoring, Control, and -Protection ICs. +LM25063, LM25066, LM5064, and LM5066/LM5066I Power Management, Monitoring, +Control, and Protection ICs. The driver is a client driver to the core PMBus driver. Please see Documentation/hwmon/pmbus for details on PMBus client drivers. diff --git a/Documentation/infiniband/tag_matching.txt b/Documentation/infiniband/tag_matching.txt new file mode 100644 index 0000000000000000000000000000000000000000..d2a3bf81922603ac9d210baa7bee37aab3b9c426 --- /dev/null +++ b/Documentation/infiniband/tag_matching.txt @@ -0,0 +1,64 @@ +Tag matching logic + +The MPI standard defines a set of rules, known as tag-matching, for matching +source send operations to destination receives. The following parameters must +match the following source and destination parameters: +* Communicator +* User tag - wild card may be specified by the receiver +* Source rank – wild car may be specified by the receiver +* Destination rank – wild +The ordering rules require that when more than one pair of send and receive +message envelopes may match, the pair that includes the earliest posted-send +and the earliest posted-receive is the pair that must be used to satisfy the +matching operation. However, this doesn’t imply that tags are consumed in +the order they are created, e.g., a later generated tag may be consumed, if +earlier tags can’t be used to satisfy the matching rules. + +When a message is sent from the sender to the receiver, the communication +library may attempt to process the operation either after or before the +corresponding matching receive is posted. If a matching receive is posted, +this is an expected message, otherwise it is called an unexpected message. +Implementations frequently use different matching schemes for these two +different matching instances. + +To keep MPI library memory footprint down, MPI implementations typically use +two different protocols for this purpose: + +1. The Eager protocol- the complete message is sent when the send is +processed by the sender. A completion send is received in the send_cq +notifying that the buffer can be reused. + +2. The Rendezvous Protocol - the sender sends the tag-matching header, +and perhaps a portion of data when first notifying the receiver. When the +corresponding buffer is posted, the responder will use the information from +the header to initiate an RDMA READ operation directly to the matching buffer. +A fin message needs to be received in order for the buffer to be reused. + +Tag matching implementation + +There are two types of matching objects used, the posted receive list and the +unexpected message list. The application posts receive buffers through calls +to the MPI receive routines in the posted receive list and posts send messages +using the MPI send routines. The head of the posted receive list may be +maintained by the hardware, with the software expected to shadow this list. + +When send is initiated and arrives at the receive side, if there is no +pre-posted receive for this arriving message, it is passed to the software and +placed in the unexpected message list. Otherwise the match is processed, +including rendezvous processing, if appropriate, delivering the data to the +specified receive buffer. This allows overlapping receive-side MPI tag +matching with computation. + +When a receive-message is posted, the communication library will first check +the software unexpected message list for a matching receive. If a match is +found, data is delivered to the user buffer, using a software controlled +protocol. The UCX implementation uses either an eager or rendezvous protocol, +depending on data size. If no match is found, the entire pre-posted receive +list is maintained by the hardware, and there is space to add one more +pre-posted receive to this list, this receive is passed to the hardware. +Software is expected to shadow this list, to help with processing MPI cancel +operations. In addition, because hardware and software are not expected to be +tightly synchronized with respect to the tag-matching operation, this shadow +list is used to detect the case that a pre-posted receive is passed to the +hardware, as the matching unexpected message is being passed from the hardware +to the software. diff --git a/Documentation/input/input.rst b/Documentation/input/input.rst index 3b3a22975106987781eda18172728a3884c59da5..47f86a4bf16cadbf258ccc8078780486c86f4370 100644 --- a/Documentation/input/input.rst +++ b/Documentation/input/input.rst @@ -109,7 +109,7 @@ evdev nodes are created with minors starting with 256. keyboard ~~~~~~~~ -``keyboard`` is in-kernel input handler ad is a part of VT code. It +``keyboard`` is in-kernel input handler and is a part of VT code. It consumes keyboard keystrokes and handles user input for VT consoles. mousedev diff --git a/Documentation/input/joydev/index.rst b/Documentation/input/joydev/index.rst index 8d9666c7561c8bafdfaaa0cb07db36fd85a55daa..ebcff43056e2db641c702348cd42a27364caabe0 100644 --- a/Documentation/input/joydev/index.rst +++ b/Documentation/input/joydev/index.rst @@ -12,7 +12,6 @@ Linux Joystick support .. toctree:: :maxdepth: 3 - :numbered: joystick joystick-api diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 7003141a6d4f2657eea22f41f26ca3e961c33856..329e740adea70db91ac2c9cd9f671f79ca58790e 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -297,9 +297,9 @@ more details, with real examples. ccflags-y specifies options for compiling with $(CC). Example: - # drivers/acpi/Makefile - ccflags-y := -Os - ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT + # drivers/acpi/acpica/Makefile + ccflags-y := -Os -D_LINUX -DBUILDING_ACPICA + ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT This variable is necessary because the top Makefile owns the variable $(KBUILD_CFLAGS) and uses it for compilation flags for the diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt index 8666070d31896eeb0c54f820189b14d95bd1833d..6c6e8c2410de390dafa0319d694a6df42000ed6b 100644 --- a/Documentation/locking/rt-mutex-design.txt +++ b/Documentation/locking/rt-mutex-design.txt @@ -97,9 +97,9 @@ waiter - A waiter is a struct that is stored on the stack of a blocked a process being blocked on the mutex, it is fine to allocate the waiter on the process's stack (local variable). This structure holds a pointer to the task, as well as the mutex that - the task is blocked on. It also has the plist node structures to - place the task in the waiter_list of a mutex as well as the - pi_list of a mutex owner task (described below). + the task is blocked on. It also has rbtree node structures to + place the task in the waiters rbtree of a mutex as well as the + pi_waiters rbtree of a mutex owner task (described below). waiter is sometimes used in reference to the task that is waiting on a mutex. This is the same as waiter->task. @@ -179,53 +179,34 @@ again. | F->L5-+ +If process G has the highest priority in the chain, then all the tasks up +the chain (A and B in this example), must have their priorities increased +to that of G. -Plist ------ - -Before I go further and talk about how the PI chain is stored through lists -on both mutexes and processes, I'll explain the plist. This is similar to -the struct list_head functionality that is already in the kernel. -The implementation of plist is out of scope for this document, but it is -very important to understand what it does. - -There are a few differences between plist and list, the most important one -being that plist is a priority sorted linked list. This means that the -priorities of the plist are sorted, such that it takes O(1) to retrieve the -highest priority item in the list. Obviously this is useful to store processes -based on their priorities. - -Another difference, which is important for implementation, is that, unlike -list, the head of the list is a different element than the nodes of a list. -So the head of the list is declared as struct plist_head and nodes that will -be added to the list are declared as struct plist_node. - - -Mutex Waiter List +Mutex Waiters Tree ----------------- -Every mutex keeps track of all the waiters that are blocked on itself. The mutex -has a plist to store these waiters by priority. This list is protected by -a spin lock that is located in the struct of the mutex. This lock is called -wait_lock. Since the modification of the waiter list is never done in -interrupt context, the wait_lock can be taken without disabling interrupts. +Every mutex keeps track of all the waiters that are blocked on itself. The +mutex has a rbtree to store these waiters by priority. This tree is protected +by a spin lock that is located in the struct of the mutex. This lock is called +wait_lock. -Task PI List +Task PI Tree ------------ -To keep track of the PI chains, each process has its own PI list. This is -a list of all top waiters of the mutexes that are owned by the process. -Note that this list only holds the top waiters and not all waiters that are +To keep track of the PI chains, each process has its own PI rbtree. This is +a tree of all top waiters of the mutexes that are owned by the process. +Note that this tree only holds the top waiters and not all waiters that are blocked on mutexes owned by the process. -The top of the task's PI list is always the highest priority task that +The top of the task's PI tree is always the highest priority task that is waiting on a mutex that is owned by the task. So if the task has inherited a priority, it will always be the priority of the task that is -at the top of this list. +at the top of this tree. -This list is stored in the task structure of a process as a plist called -pi_list. This list is protected by a spin lock also in the task structure, +This tree is stored in the task structure of a process as a rbtree called +pi_waiters. It is protected by a spin lock also in the task structure, called pi_lock. This lock may also be taken in interrupt context, so when locking the pi_lock, interrupts must be disabled. @@ -312,15 +293,12 @@ Mutex owner and flags The mutex structure contains a pointer to the owner of the mutex. If the mutex is not owned, this owner is set to NULL. Since all architectures -have the task structure on at least a four byte alignment (and if this is -not true, the rtmutex.c code will be broken!), this allows for the two -least significant bits to be used as flags. This part is also described -in Documentation/rt-mutex.txt, but will also be briefly described here. - -Bit 0 is used as the "Pending Owner" flag. This is described later. -Bit 1 is used as the "Has Waiters" flags. This is also described later - in more detail, but is set whenever there are waiters on a mutex. +have the task structure on at least a two byte alignment (and if this is +not true, the rtmutex.c code will be broken!), this allows for the least +significant bit to be used as a flag. Bit 0 is used as the "Has Waiters" +flag. It's set whenever there are waiters on a mutex. +See Documentation/locking/rt-mutex.txt for further details. cmpxchg Tricks -------------- @@ -359,40 +337,31 @@ Priority adjustments -------------------- The implementation of the PI code in rtmutex.c has several places that a -process must adjust its priority. With the help of the pi_list of a +process must adjust its priority. With the help of the pi_waiters of a process this is rather easy to know what needs to be adjusted. -The functions implementing the task adjustments are rt_mutex_adjust_prio, -__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock -to already be taken), rt_mutex_getprio, and rt_mutex_setprio. +The functions implementing the task adjustments are rt_mutex_adjust_prio +and rt_mutex_setprio. rt_mutex_setprio is only used in rt_mutex_adjust_prio. -rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio. +rt_mutex_adjust_prio examines the priority of the task, and the highest +priority process that is waiting any of mutexes owned by the task. Since +the pi_waiters of a task holds an order by priority of all the top waiters +of all the mutexes that the task owns, we simply need to compare the top +pi waiter to its own normal/deadline priority and take the higher one. +Then rt_mutex_setprio is called to adjust the priority of the task to the +new priority. Note that rt_mutex_setprio is defined in kernel/sched/core.c +to implement the actual change in priority. -rt_mutex_getprio returns the priority that the task should have. Either the -task's own normal priority, or if a process of a higher priority is waiting on -a mutex owned by the task, then that higher priority should be returned. -Since the pi_list of a task holds an order by priority list of all the top -waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs -to compare the top pi waiter to its own normal priority, and return the higher -priority back. +(Note: For the "prio" field in task_struct, the lower the number, the + higher the priority. A "prio" of 5 is of higher priority than a + "prio" of 10.) -(Note: if looking at the code, you will notice that the lower number of - prio is returned. This is because the prio field in the task structure - is an inverse order of the actual priority. So a "prio" of 5 is - of higher priority than a "prio" of 10.) - -__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the -result does not equal the task's current priority, then rt_mutex_setprio -is called to adjust the priority of the task to the new priority. -Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the -actual change in priority. - -It is interesting to note that __rt_mutex_adjust_prio can either increase +It is interesting to note that rt_mutex_adjust_prio can either increase or decrease the priority of the task. In the case that a higher priority -process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio +process has just blocked on a mutex owned by the task, rt_mutex_adjust_prio would increase/boost the task's priority. But if a higher priority task were for some reason to leave the mutex (timeout or signal), this same function -would decrease/unboost the priority of the task. That is because the pi_list +would decrease/unboost the priority of the task. That is because the pi_waiters always contains the highest priority task that is waiting on a mutex owned by the task, so we only need to compare the priority of that top pi waiter to the normal priority of the given task. @@ -412,9 +381,10 @@ priorities. rt_mutex_adjust_prio_chain is called with a task to be checked for PI (de)boosting (the owner of a mutex that a process is blocking on), a flag to -check for deadlocking, the mutex that the task owns, and a pointer to a waiter +check for deadlocking, the mutex that the task owns, a pointer to a waiter that is the process's waiter struct that is blocked on the mutex (although this -parameter may be NULL for deboosting). +parameter may be NULL for deboosting), a pointer to the mutex on which the task +is blocked, and a top_task as the top waiter of the mutex. For this explanation, I will not mention deadlock detection. This explanation will try to stay at a high level. @@ -424,133 +394,14 @@ that the state of the owner and lock can change when entered into this function. Before this function is called, the task has already had rt_mutex_adjust_prio performed on it. This means that the task is set to the priority that it -should be at, but the plist nodes of the task's waiter have not been updated -with the new priorities, and that this task may not be in the proper locations -in the pi_lists and wait_lists that the task is blocked on. This function +should be at, but the rbtree nodes of the task's waiter have not been updated +with the new priorities, and this task may not be in the proper locations +in the pi_waiters and waiters trees that the task is blocked on. This function solves all that. -A loop is entered, where task is the owner to be checked for PI changes that -was passed by parameter (for the first iteration). The pi_lock of this task is -taken to prevent any more changes to the pi_list of the task. This also -prevents new tasks from completing the blocking on a mutex that is owned by this -task. - -If the task is not blocked on a mutex then the loop is exited. We are at -the top of the PI chain. - -A check is now done to see if the original waiter (the process that is blocked -on the current mutex) is the top pi waiter of the task. That is, is this -waiter on the top of the task's pi_list. If it is not, it either means that -there is another process higher in priority that is blocked on one of the -mutexes that the task owns, or that the waiter has just woken up via a signal -or timeout and has left the PI chain. In either case, the loop is exited, since -we don't need to do any more changes to the priority of the current task, or any -task that owns a mutex that this current task is waiting on. A priority chain -walk is only needed when a new top pi waiter is made to a task. - -The next check sees if the task's waiter plist node has the priority equal to -the priority the task is set at. If they are equal, then we are done with -the loop. Remember that the function started with the priority of the -task adjusted, but the plist nodes that hold the task in other processes -pi_lists have not been adjusted. - -Next, we look at the mutex that the task is blocked on. The mutex's wait_lock -is taken. This is done by a spin_trylock, because the locking order of the -pi_lock and wait_lock goes in the opposite direction. If we fail to grab the -lock, the pi_lock is released, and we restart the loop. - -Now that we have both the pi_lock of the task as well as the wait_lock of -the mutex the task is blocked on, we update the task's waiter's plist node -that is located on the mutex's wait_list. - -Now we release the pi_lock of the task. - -Next the owner of the mutex has its pi_lock taken, so we can update the -task's entry in the owner's pi_list. If the task is the highest priority -process on the mutex's wait_list, then we remove the previous top waiter -from the owner's pi_list, and replace it with the task. - -Note: It is possible that the task was the current top waiter on the mutex, - in which case the task is not yet on the pi_list of the waiter. This - is OK, since plist_del does nothing if the plist node is not on any - list. - -If the task was not the top waiter of the mutex, but it was before we -did the priority updates, that means we are deboosting/lowering the -task. In this case, the task is removed from the pi_list of the owner, -and the new top waiter is added. - -Lastly, we unlock both the pi_lock of the task, as well as the mutex's -wait_lock, and continue the loop again. On the next iteration of the -loop, the previous owner of the mutex will be the task that will be -processed. - -Note: One might think that the owner of this mutex might have changed - since we just grab the mutex's wait_lock. And one could be right. - The important thing to remember is that the owner could not have - become the task that is being processed in the PI chain, since - we have taken that task's pi_lock at the beginning of the loop. - So as long as there is an owner of this mutex that is not the same - process as the tasked being worked on, we are OK. - - Looking closely at the code, one might be confused. The check for the - end of the PI chain is when the task isn't blocked on anything or the - task's waiter structure "task" element is NULL. This check is - protected only by the task's pi_lock. But the code to unlock the mutex - sets the task's waiter structure "task" element to NULL with only - the protection of the mutex's wait_lock, which was not taken yet. - Isn't this a race condition if the task becomes the new owner? - - The answer is No! The trick is the spin_trylock of the mutex's - wait_lock. If we fail that lock, we release the pi_lock of the - task and continue the loop, doing the end of PI chain check again. - - In the code to release the lock, the wait_lock of the mutex is held - the entire time, and it is not let go when we grab the pi_lock of the - new owner of the mutex. So if the switch of a new owner were to happen - after the check for end of the PI chain and the grabbing of the - wait_lock, the unlocking code would spin on the new owner's pi_lock - but never give up the wait_lock. So the PI chain loop is guaranteed to - fail the spin_trylock on the wait_lock, release the pi_lock, and - try again. - - If you don't quite understand the above, that's OK. You don't have to, - unless you really want to make a proof out of it ;) - - -Pending Owners and Lock stealing --------------------------------- - -One of the flags in the owner field of the mutex structure is "Pending Owner". -What this means is that an owner was chosen by the process releasing the -mutex, but that owner has yet to wake up and actually take the mutex. - -Why is this important? Why can't we just give the mutex to another process -and be done with it? - -The PI code is to help with real-time processes, and to let the highest -priority process run as long as possible with little latencies and delays. -If a high priority process owns a mutex that a lower priority process is -blocked on, when the mutex is released it would be given to the lower priority -process. What if the higher priority process wants to take that mutex again. -The high priority process would fail to take that mutex that it just gave up -and it would need to boost the lower priority process to run with full -latency of that critical section (since the low priority process just entered -it). - -There's no reason a high priority process that gives up a mutex should be -penalized if it tries to take that mutex again. If the new owner of the -mutex has not woken up yet, there's no reason that the higher priority process -could not take that mutex away. - -To solve this, we introduced Pending Ownership and Lock Stealing. When a -new process is given a mutex that it was blocked on, it is only given -pending ownership. This means that it's the new owner, unless a higher -priority process comes in and tries to grab that mutex. If a higher priority -process does come along and wants that mutex, we let the higher priority -process "steal" the mutex from the pending owner (only if it is still pending) -and continue with the mutex. - +The main operation of this function is summarized by Thomas Gleixner in +rtmutex.c. See the 'Chain walk basics and protection scope' comment for further +details. Taking of a mutex (The walk through) ------------------------------------ @@ -563,14 +414,14 @@ done when we have CMPXCHG enabled (otherwise the fast taking automatically fails). Only when the owner field of the mutex is NULL can the lock be taken with the CMPXCHG and nothing else needs to be done. -If there is contention on the lock, whether it is owned or pending owner -we go about the slow path (rt_mutex_slowlock). +If there is contention on the lock, we go about the slow path +(rt_mutex_slowlock). The slow path function is where the task's waiter structure is created on the stack. This is because the waiter structure is only needed for the scope of this function. The waiter structure holds the nodes to store -the task on the wait_list of the mutex, and if need be, the pi_list of -the owner. +the task on the waiters tree of the mutex, and if need be, the pi_waiters +tree of the owner. The wait_lock of the mutex is taken since the slow path of unlocking the mutex also takes this lock. @@ -581,102 +432,45 @@ contention). try_to_take_rt_mutex is used every time the task tries to grab a mutex in the slow path. The first thing that is done here is an atomic setting of -the "Has Waiters" flag of the mutex's owner field. Yes, this could really -be false, because if the mutex has no owner, there are no waiters and -the current task also won't have any waiters. But we don't have the lock -yet, so we assume we are going to be a waiter. The reason for this is to -play nice for those architectures that do have CMPXCHG. By setting this flag -now, the owner of the mutex can't release the mutex without going into the -slow unlock path, and it would then need to grab the wait_lock, which this -code currently holds. So setting the "Has Waiters" flag forces the owner -to synchronize with this code. - -Now that we know that we can't have any races with the owner releasing the -mutex, we check to see if we can take the ownership. This is done if the -mutex doesn't have a owner, or if we can steal the mutex from a pending -owner. Let's look at the situations we have here. - - 1) Has owner that is pending - ---------------------------- - - The mutex has a owner, but it hasn't woken up and the mutex flag - "Pending Owner" is set. The first check is to see if the owner isn't the - current task. This is because this function is also used for the pending - owner to grab the mutex. When a pending owner wakes up, it checks to see - if it can take the mutex, and this is done if the owner is already set to - itself. If so, we succeed and leave the function, clearing the "Pending - Owner" bit. - - If the pending owner is not current, we check to see if the current priority is - higher than the pending owner. If not, we fail the function and return. - - There's also something special about a pending owner. That is a pending owner - is never blocked on a mutex. So there is no PI chain to worry about. It also - means that if the mutex doesn't have any waiters, there's no accounting needed - to update the pending owner's pi_list, since we only worry about processes - blocked on the current mutex. - - If there are waiters on this mutex, and we just stole the ownership, we need - to take the top waiter, remove it from the pi_list of the pending owner, and - add it to the current pi_list. Note that at this moment, the pending owner - is no longer on the list of waiters. This is fine, since the pending owner - would add itself back when it realizes that it had the ownership stolen - from itself. When the pending owner tries to grab the mutex, it will fail - in try_to_take_rt_mutex if the owner field points to another process. - - 2) No owner - ----------- - - If there is no owner (or we successfully stole the lock), we set the owner - of the mutex to current, and set the flag of "Has Waiters" if the current - mutex actually has waiters, or we clear the flag if it doesn't. See, it was - OK that we set that flag early, since now it is cleared. - - 3) Failed to grab ownership - --------------------------- - - The most interesting case is when we fail to take ownership. This means that - there exists an owner, or there's a pending owner with equal or higher - priority than the current task. - -We'll continue on the failed case. - -If the mutex has a timeout, we set up a timer to go off to break us out -of this mutex if we failed to get it after a specified amount of time. - -Now we enter a loop that will continue to try to take ownership of the mutex, or -fail from a timeout or signal. - -Once again we try to take the mutex. This will usually fail the first time -in the loop, since it had just failed to get the mutex. But the second time -in the loop, this would likely succeed, since the task would likely be -the pending owner. - -If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done -here. - -The waiter structure has a "task" field that points to the task that is blocked -on the mutex. This field can be NULL the first time it goes through the loop -or if the task is a pending owner and had its mutex stolen. If the "task" -field is NULL then we need to set up the accounting for it. +the "Has Waiters" flag of the mutex's owner field. By setting this flag +now, the current owner of the mutex being contended for can't release the mutex +without going into the slow unlock path, and it would then need to grab the +wait_lock, which this code currently holds. So setting the "Has Waiters" flag +forces the current owner to synchronize with this code. + +The lock is taken if the following are true: + 1) The lock has no owner + 2) The current task is the highest priority against all other + waiters of the lock + +If the task succeeds to acquire the lock, then the task is set as the +owner of the lock, and if the lock still has waiters, the top_waiter +(highest priority task waiting on the lock) is added to this task's +pi_waiters tree. + +If the lock is not taken by try_to_take_rt_mutex(), then the +task_blocks_on_rt_mutex() function is called. This will add the task to +the lock's waiter tree and propagate the pi chain of the lock as well +as the lock's owner's pi_waiters tree. This is described in the next +section. Task blocks on mutex -------------------- The accounting of a mutex and process is done with the waiter structure of the process. The "task" field is set to the process, and the "lock" field -to the mutex. The plist nodes are initialized to the processes current -priority. +to the mutex. The rbtree node of waiter are initialized to the processes +current priority. Since the wait_lock was taken at the entry of the slow lock, we can safely -add the waiter to the wait_list. If the current process is the highest -priority process currently waiting on this mutex, then we remove the -previous top waiter process (if it exists) from the pi_list of the owner, -and add the current process to that list. Since the pi_list of the owner +add the waiter to the task waiter tree. If the current process is the +highest priority process currently waiting on this mutex, then we remove the +previous top waiter process (if it exists) from the pi_waiters of the owner, +and add the current process to that tree. Since the pi_waiter of the owner has changed, we call rt_mutex_adjust_prio on the owner to see if the owner should adjust its priority accordingly. -If the owner is also blocked on a lock, and had its pi_list changed +If the owner is also blocked on a lock, and had its pi_waiters changed (or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead and run rt_mutex_adjust_prio_chain on the owner, as described earlier. @@ -686,30 +480,23 @@ mutex (waiter "task" field is not NULL), then we go to sleep (call schedule). Waking up in the loop --------------------- -The schedule can then wake up for a few reasons. - 1) we were given pending ownership of the mutex. - 2) we received a signal and was TASK_INTERRUPTIBLE - 3) we had a timeout and was TASK_INTERRUPTIBLE +The task can then wake up for a couple of reasons: + 1) The previous lock owner released the lock, and the task now is top_waiter + 2) we received a signal or timeout -In any of these cases, we continue the loop and once again try to grab the -ownership of the mutex. If we succeed, we exit the loop, otherwise we continue -and on signal and timeout, will exit the loop, or if we had the mutex stolen -we just simply add ourselves back on the lists and go back to sleep. +In both cases, the task will try again to acquire the lock. If it +does, then it will take itself off the waiters tree and set itself back +to the TASK_RUNNING state. -Note: For various reasons, because of timeout and signals, the steal mutex - algorithm needs to be careful. This is because the current process is - still on the wait_list. And because of dynamic changing of priorities, - especially on SCHED_OTHER tasks, the current process can be the - highest priority task on the wait_list. - -Failed to get mutex on Timeout or Signal ----------------------------------------- +In first case, if the lock was acquired by another task before this task +could get the lock, then it will go back to sleep and wait to be woken again. -If a timeout or signal occurred, the waiter's "task" field would not be -NULL and the task needs to be taken off the wait_list of the mutex and perhaps -pi_list of the owner. If this process was a high priority process, then -the rt_mutex_adjust_prio_chain needs to be executed again on the owner, -but this time it will be lowering the priorities. +The second case is only applicable for tasks that are grabbing a mutex +that can wake up before getting the lock, either due to a signal or +a timeout (i.e. rt_mutex_timed_futex_lock()). When woken, it will try to +take the lock again, if it succeeds, then the task will return with the +lock held, otherwise it will return with -EINTR if the task was woken +by a signal, or -ETIMEDOUT if it timed out. Unlocking the Mutex @@ -739,25 +526,12 @@ owner still needs to make this check. If there are no waiters then the mutex owner field is set to NULL, the wait_lock is released and nothing more is needed. -If there are waiters, then we need to wake one up and give that waiter -pending ownership. +If there are waiters, then we need to wake one up. On the wake up code, the pi_lock of the current owner is taken. The top -waiter of the lock is found and removed from the wait_list of the mutex -as well as the pi_list of the current owner. The task field of the new -pending owner's waiter structure is set to NULL, and the owner field of the -mutex is set to the new owner with the "Pending Owner" bit set, as well -as the "Has Waiters" bit if there still are other processes blocked on the -mutex. - -The pi_lock of the previous owner is released, and the new pending owner's -pi_lock is taken. Remember that this is the trick to prevent the race -condition in rt_mutex_adjust_prio_chain from adding itself as a waiter -on the mutex. - -We now clear the "pi_blocked_on" field of the new pending owner, and if -the mutex still has waiters pending, we add the new top waiter to the pi_list -of the pending owner. +waiter of the lock is found and removed from the waiters tree of the mutex +as well as the pi_waiters tree of the current owner. The "Has Waiters" bit is +marked to prevent lower priority tasks from stealing the lock. Finally we unlock the pi_lock of the pending owner and wake it up. @@ -772,10 +546,14 @@ Credits ------- Author: Steven Rostedt +Updated: Alex Shi - 7/6/2017 -Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap +Original Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and + Randy Dunlap +Update (7/6/2017) Reviewers: Steven Rostedt and Sebastian Siewior Updates ------- This document was originally written for 2.6.17-rc3-mm1 +was updated on 4.12 diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt index 243393d882ee7ed5fc70ddb8679afa390c61dead..35793e003041dc7ec568b51bcfc9667c6db0e780 100644 --- a/Documentation/locking/rt-mutex.txt +++ b/Documentation/locking/rt-mutex.txt @@ -28,14 +28,13 @@ magic bullet for poorly designed applications, but it allows well-designed applications to use userspace locks in critical parts of an high priority thread, without losing determinism. -The enqueueing of the waiters into the rtmutex waiter list is done in +The enqueueing of the waiters into the rtmutex waiter tree is done in priority order. For same priorities FIFO order is chosen. For each rtmutex, only the top priority waiter is enqueued into the owner's -priority waiters list. This list too queues in priority order. Whenever +priority waiters tree. This tree too queues in priority order. Whenever the top priority waiter of a task changes (for example it timed out or -got a signal), the priority of the owner task is readjusted. [The -priority enqueueing is handled by "plists", see include/linux/plist.h -for more details.] +got a signal), the priority of the owner task is readjusted. The +priority enqueueing is handled by "pi_waiters". RT-mutexes are optimized for fastpath operations and have no internal locking overhead when locking an uncontended mutex or unlocking a mutex @@ -46,34 +45,29 @@ is used] The state of the rt-mutex is tracked via the owner field of the rt-mutex structure: -rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1 -are used to keep track of the "owner is pending" and "rtmutex has -waiters" state. +lock->owner holds the task_struct pointer of the owner. Bit 0 is used to +keep track of the "lock has waiters" state. - owner bit1 bit0 - NULL 0 0 mutex is free (fast acquire possible) - NULL 0 1 invalid state - NULL 1 0 Transitional state* - NULL 1 1 invalid state - taskpointer 0 0 mutex is held (fast release possible) - taskpointer 0 1 task is pending owner - taskpointer 1 0 mutex is held and has waiters - taskpointer 1 1 task is pending owner and mutex has waiters + owner bit0 + NULL 0 lock is free (fast acquire possible) + NULL 1 lock is free and has waiters and the top waiter + is going to take the lock* + taskpointer 0 lock is held (fast release possible) + taskpointer 1 lock is held and has waiters** -Pending-ownership handling is a performance optimization: -pending-ownership is assigned to the first (highest priority) waiter of -the mutex, when the mutex is released. The thread is woken up and once -it starts executing it can acquire the mutex. Until the mutex is taken -by it (bit 0 is cleared) a competing higher priority thread can "steal" -the mutex which puts the woken up thread back on the waiters list. +The fast atomic compare exchange based acquire and release is only +possible when bit 0 of lock->owner is 0. -The pending-ownership optimization is especially important for the -uninterrupted workflow of high-prio tasks which repeatedly -takes/releases locks that have lower-prio waiters. Without this -optimization the higher-prio thread would ping-pong to the lower-prio -task [because at unlock time we always assign a new owner]. +(*) It also can be a transitional state when grabbing the lock +with ->wait_lock is held. To prevent any fast path cmpxchg to the lock, +we need to set the bit0 before looking at the lock, and the owner may be +NULL in this small time, hence this can be a transitional state. -(*) The "mutex has waiters" bit gets set to take the lock. If the lock -doesn't already have an owner, this bit is quickly cleared if there are -no waiters. So this is a transitional state to synchronize with looking -at the owner field of the mutex and the mutex owner releasing the lock. +(**) There is a small time when bit 0 is set but there are no +waiters. This can happen when grabbing the lock in the slow path. +To prevent a cmpxchg of the owner releasing the lock, we need to +set this bit before looking at the lock. + +BTW, there is still technically a "Pending Owner", it's just not called +that anymore. The pending owner happens to be the top_waiter of a lock +that has no owner and has been woken up to grab the lock. diff --git a/Documentation/media/uapi/cec/cec-funcs.rst b/Documentation/media/uapi/cec/cec-funcs.rst index 5b7630f2e076f5046d2bdbe47ef848f883f26a08..6d696cead5cb58fe5f615960680c5ec44582bc61 100644 --- a/Documentation/media/uapi/cec/cec-funcs.rst +++ b/Documentation/media/uapi/cec/cec-funcs.rst @@ -7,7 +7,6 @@ Function Reference .. toctree:: :maxdepth: 1 - :numbered: cec-func-open cec-func-close diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index c4114346f054512b28acaf3853a9cbc5768b2849..057e9fdbfac9d93f48b85159e3a5e7954b0a781a 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt @@ -84,17 +84,17 @@ Device drivers API ================== The include/net/mac802154.h defines following functions: - - struct ieee802154_dev *ieee802154_alloc_device - (size_t priv_size, struct ieee802154_ops *ops): - allocation of IEEE 802.15.4 compatible device + - struct ieee802154_hw * + ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops): + allocation of IEEE 802.15.4 compatible hardware device - - void ieee802154_free_device(struct ieee802154_dev *dev): - freeing allocated device + - void ieee802154_free_hw(struct ieee802154_hw *hw): + freeing allocated hardware device - - int ieee802154_register_device(struct ieee802154_dev *dev): - register PHY in the system + - int ieee802154_register_hw(struct ieee802154_hw *hw): + register PHY which is the allocated hardware device, in the system - - void ieee802154_unregister_device(struct ieee802154_dev *dev): + - void ieee802154_unregister_hw(struct ieee802154_hw *hw): freeing registered PHY Moreover IEEE 802.15.4 device operations structure should be filled. diff --git a/Documentation/nvmem/nvmem.txt b/Documentation/nvmem/nvmem.txt index dbd40d879239456b6b17f815b6c1d86263d0c2e5..8d8d8f58f96fbd2420a8c2b00a9988259220700a 100644 --- a/Documentation/nvmem/nvmem.txt +++ b/Documentation/nvmem/nvmem.txt @@ -112,7 +112,7 @@ take nvmem_device as parameter. 5. Releasing a reference to the NVMEM ===================================== -When a consumers no longer needs the NVMEM, it has to release the reference +When a consumer no longer needs the NVMEM, it has to release the reference to the NVMEM it has obtained using the APIs mentioned in the above section. The NVMEM framework provides 2 APIs to release a reference to the NVMEM. diff --git a/Documentation/process/applying-patches.rst b/Documentation/process/applying-patches.rst index a0d058cc6d25a7c843c453e1d93147228b4f075e..dc2ddc3450442b5b8632b87ae21d8c6d43f46d10 100644 --- a/Documentation/process/applying-patches.rst +++ b/Documentation/process/applying-patches.rst @@ -6,9 +6,6 @@ Applying Patches To The Linux Kernel Original by: Jesper Juhl, August 2005 -Last update: - 2016-09-14 - .. note:: This document is obsolete. In most cases, rather than using ``patch`` @@ -344,7 +341,7 @@ possible. This is a good branch to run for people who want to help out testing development kernels but do not want to run some of the really experimental -stuff (such people should see the sections about -git and -mm kernels below). +stuff (such people should see the sections about -next and -mm kernels below). The -rc patches are not incremental, they apply to a base 4.x kernel, just like the 4.x.y patches described above. The kernel version before the -rcN @@ -380,44 +377,6 @@ Here are 3 examples of how to apply these patches:: $ mv linux-4.7.3 linux-4.8-rc5 # rename the kernel source dir -The -git kernels -================ - -These are daily snapshots of Linus' kernel tree (managed in a git -repository, hence the name). - -These patches are usually released daily and represent the current state of -Linus's tree. They are more experimental than -rc kernels since they are -generated automatically without even a cursory glance to see if they are -sane. - --git patches are not incremental and apply either to a base 4.x kernel or -a base 4.x-rc kernel -- you can see which from their name. -A patch named 4.7-git1 applies to the 4.7 kernel source and a patch -named 4.8-rc3-git2 applies to the source of the 4.8-rc3 kernel. - -Here are some examples of how to apply these patches:: - - # moving from 4.7 to 4.7-git1 - - $ cd ~/linux-4.7 # change to the kernel source dir - $ patch -p1 < ../patch-4.7-git1 # apply the 4.7-git1 patch - $ cd .. - $ mv linux-4.7 linux-4.7-git1 # rename the kernel source dir - - # moving from 4.7-git1 to 4.8-rc2-git3 - - $ cd ~/linux-4.7-git1 # change to the kernel source dir - $ patch -p1 -R < ../patch-4.7-git1 # revert the 4.7-git1 patch - # we now have a 4.7 kernel - $ patch -p1 < ../patch-4.8-rc2 # apply the 4.8-rc2 patch - # the kernel is now 4.8-rc2 - $ patch -p1 < ../patch-4.8-rc2-git3 # apply the 4.8-rc2-git3 patch - # the kernel is now 4.8-rc2-git3 - $ cd .. - $ mv linux-4.7-git1 linux-4.8-rc2-git3 # rename source dir - - The -mm patches and the linux-next tree ======================================= diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index adbb50ae52464b2879d4ed7efedfbd41123c85b9..560beaef5a7c83b99b69292943a1eb2732efccee 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -53,7 +53,7 @@ mcelog 0.6 mcelog --version iptables 1.4.2 iptables -V openssl & libcrypto 1.0.0 openssl version bc 1.06.95 bc --version -Sphinx\ [#f1]_ 1.2 sphinx-build --version +Sphinx\ [#f1]_ 1.3 sphinx-build --version ====================== =============== ======================================== .. [#f1] Sphinx is needed only to build the Kernel documentation @@ -309,18 +309,8 @@ Kernel documentation Sphinx ------ -The ReST markups currently used by the Documentation/ files are meant to be -built with ``Sphinx`` version 1.2 or upper. If you're desiring to build -PDF outputs, it is recommended to use version 1.4.6. - -.. note:: - - Please notice that, for PDF and LaTeX output, you'll also need ``XeLaTeX`` - version 3.14159265. Depending on the distribution, you may also need to - install a series of ``texlive`` packages that provide the minimal set of - functionalities required for ``XeLaTex`` to work. For PDF output you'll also - need ``convert(1)`` from ImageMagick (https://www.imagemagick.org). - +Please see :ref:`sphinx_install` in ``Documentation/doc-guide/sphinx.rst`` +for details about Sphinx requirements. Getting updated software ======================== diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 61e9c78bd6d1d999033d11170ea6062337984a7e..36a2dded525b7a9b149c1f5d28d1bc19a46bceca 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -166,12 +166,12 @@ Trees - The queues of patches, for both completed versions and in progress versions can be found at: - http://git.kernel.org/?p=linux/kernel/git/stable/stable-queue.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git - The finalized and tagged releases of all stable kernels can be found in separate branches per version at: - http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git Review committee diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 3e10719fee35d3795ba0baa2d1cf4efa5738b671..733478ade91b50efd098bbfd3cd6831cfa99fcc9 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -413,7 +413,7 @@ e-mail discussions. -11) Sign your work — the Developer's Certificate of Origin +11) Sign your work - the Developer's Certificate of Origin ---------------------------------------------------------- To improve tracking of who did what, especially with patches that can diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index 1648fa80b3bfddb59a0b6248e4ffbac90bdbb402..1266eeae45f69caeed72902d2507c3131de9a08f 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -16,17 +16,7 @@ The key service can be configured on by enabling: This document has the following sections: - - Key overview - - Key service overview - - Key access permissions - - SELinux support - - New procfs files - - Userspace system call interface - - Kernel services - - Notes on accessing payload contents - - Defining a key type - - Request-key callback service - - Garbage collection +.. contents:: :local: Key Overview @@ -443,7 +433,7 @@ The main syscalls are: /sbin/request-key will be invoked in an attempt to obtain a key. The callout_info string will be passed as an argument to the program. - See also Documentation/security/keys-request-key.txt. + See also Documentation/security/keys/request-key.rst. The keyctl syscall functions are: @@ -973,7 +963,7 @@ payload contents" for more information. If successful, the key will have been attached to the default keyring for implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING. - See also Documentation/security/keys-request-key.txt. + See also Documentation/security/keys/request-key.rst. * To search for a key, passing auxiliary data to the upcaller, call:: diff --git a/Documentation/security/keys/request-key.rst b/Documentation/security/keys/request-key.rst index aba32784174c4e96ce46febb8b60a133a657760e..b2d16abaa9e9c2a416658a4cde83fcb2bc73d8b8 100644 --- a/Documentation/security/keys/request-key.rst +++ b/Documentation/security/keys/request-key.rst @@ -3,7 +3,7 @@ Key Request Service =================== The key request service is part of the key retention service (refer to -Documentation/security/keys.txt). This document explains more fully how +Documentation/security/core.rst). This document explains more fully how the requesting algorithm works. The process starts by either the kernel requesting a service by calling diff --git a/Documentation/security/keys/trusted-encrypted.rst b/Documentation/security/keys/trusted-encrypted.rst index 7b503831bdeaf0dd40e1f04f46e835a8d2fa0680..3bb24e09a33299c7aa2a940f2a999c78859804bb 100644 --- a/Documentation/security/keys/trusted-encrypted.rst +++ b/Documentation/security/keys/trusted-encrypted.rst @@ -172,4 +172,4 @@ Other uses for trusted and encrypted keys, such as for disk and file encryption are anticipated. In particular the new format 'ecryptfs' has been defined in in order to use encrypted keys to mount an eCryptfs filesystem. More details about the usage can be found in the file -``Documentation/security/keys-ecryptfs.txt``. +``Documentation/security/keys/ecryptfs.rst``. diff --git a/Documentation/sphinx-static/theme_overrides.css b/Documentation/sphinx-static/theme_overrides.css index d5764a4de5a2cc7702936d10853f49f633d1d938..522b6d4c49d4cf64e055b9ecf821f1883365f3af 100644 --- a/Documentation/sphinx-static/theme_overrides.css +++ b/Documentation/sphinx-static/theme_overrides.css @@ -4,6 +4,17 @@ * */ +/* Interim: Code-blocks with line nos - lines and line numbers don't line up. + * see: https://github.com/rtfd/sphinx_rtd_theme/issues/419 + */ + +div[class^="highlight"] pre { + line-height: normal; +} +.rst-content .highlight > pre { + line-height: normal; +} + @media screen { /* content column @@ -56,6 +67,12 @@ font-family: "Courier New", Courier, monospace } + /* fix bottom margin of lists items */ + + .rst-content .section ul li:last-child, .rst-content .section ul li p:last-child { + margin-bottom: 12px; + } + /* inline literal: drop the borderbox, padding and red color */ code, .rst-content tt, .rst-content code { diff --git a/Documentation/sphinx/kerneldoc.py b/Documentation/sphinx/kerneldoc.py index d15e07f36881bc5d20dc85332198cb4df8aa3acc..39aa9e8697ccf8f1ad3e0d7210294eff20bd9cb0 100644 --- a/Documentation/sphinx/kerneldoc.py +++ b/Documentation/sphinx/kerneldoc.py @@ -27,6 +27,7 @@ # Please make sure this works on both python2 and python3. # +import codecs import os import subprocess import sys @@ -88,13 +89,10 @@ class KernelDocDirective(Directive): try: env.app.verbose('calling kernel-doc \'%s\'' % (" ".join(cmd))) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() - # python2 needs conversion to unicode. - # python3 with universal_newlines=True returns strings. - if sys.version_info.major < 3: - out, err = unicode(out, 'utf-8'), unicode(err, 'utf-8') + out, err = codecs.decode(out, 'utf-8'), codecs.decode(err, 'utf-8') if p.returncode != 0: sys.stderr.write(err) diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..742be3e126194e27c18633ab5e556c005e5679f2 --- /dev/null +++ b/Documentation/sphinx/requirements.txt @@ -0,0 +1,3 @@ +docutils==0.12 +Sphinx==1.4.9 +sphinx_rtd_theme diff --git a/Documentation/translations/zh_CN/HOWTO b/Documentation/translations/zh_CN/HOWTO index 11be075ba5fa42abe96564e5bc55dcd446ad7726..5f6d09edc9ac9d5ff57ca9716db225d7a57b8602 100644 --- a/Documentation/translations/zh_CN/HOWTO +++ b/Documentation/translations/zh_CN/HOWTO @@ -149,9 +149,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与 核源码的主目录中使用以下不同命令将会分别生成PDF、Postscript、HTML和手册 页等不同格式的文档: make pdfdocs - make psdocs make htmldocs - make mandocs 如何成为内核开发者 diff --git a/MAINTAINERS b/MAINTAINERS index 1c3feffb1c1cfd2b46685907300a9f026fb67e6a..a2add4a4f7f3ee025f4eb5c0b0bd17443aa77641 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4359,6 +4359,12 @@ S: Maintained F: drivers/gpu/drm/qxl/ F: include/uapi/drm/qxl_drm.h +DRM DRIVER FOR PERVASIVE DISPLAYS REPAPER PANELS +M: Noralf Trønnes +S: Maintained +F: drivers/gpu/drm/tinydrm/repaper.c +F: Documentation/devicetree/bindings/display/repaper.txt + DRM DRIVER FOR RAGE 128 VIDEO CARDS S: Orphan / Obsolete F: drivers/gpu/drm/r128/ @@ -4374,6 +4380,12 @@ S: Orphan / Obsolete F: drivers/gpu/drm/sis/ F: include/uapi/drm/sis_drm.h +DRM DRIVER FOR SITRONIX ST7586 PANELS +M: David Lechner +S: Maintained +F: drivers/gpu/drm/tinydrm/st7586.c +F: Documentation/devicetree/bindings/display/st7586.txt + DRM DRIVER FOR TDFX VIDEO CARDS S: Orphan / Obsolete F: drivers/gpu/drm/tdfx/ @@ -4622,6 +4634,14 @@ F: drivers/gpu/drm/panel/ F: include/drm/drm_panel.h F: Documentation/devicetree/bindings/display/panel/ +DRM TINYDRM DRIVERS +M: Noralf Trønnes +W: https://github.com/notro/tinydrm/wiki/Development +T: git git://anongit.freedesktop.org/drm/drm-misc +S: Maintained +F: drivers/gpu/drm/tinydrm/ +F: include/drm/tinydrm/ + DSBR100 USB FM RADIO DRIVER M: Alexey Klimov L: linux-media@vger.kernel.org @@ -6744,8 +6764,9 @@ S: Supported F: drivers/scsi/isci/ INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) -M: Daniel Vetter M: Jani Nikula +M: Joonas Lahtinen +M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ B: https://01.org/linuxgraphics/documentation/how-report-bugs diff --git a/Makefile b/Makefile index dda88e744d5f3132ab926c43f512fe49f5a0c3b4..ab067d51ddf15786df7daef7d388e4ce7c8ac3b3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = NAME = Fearless Coyote # *DOCUMENTATION* @@ -1468,7 +1468,7 @@ $(help-board-dirs): help-%: # Documentation targets # --------------------------------------------------------------------------- -DOC_TARGETS := xmldocs sgmldocs psdocs latexdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs linkcheckdocs +DOC_TARGETS := xmldocs latexdocs pdfdocs htmldocs epubdocs cleandocs linkcheckdocs PHONY += $(DOC_TARGETS) $(DOC_TARGETS): scripts_basic FORCE $(Q)$(MAKE) $(build)=Documentation $@ diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index ff4049155c840c2fc4bc9e8015b5282dadb57469..4d61d2a50c525aab2531e3bf670227cbd5e66bca 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -299,6 +299,7 @@ static inline void __iomem * ioremap_nocache(unsigned long offset, return ioremap(offset, size); } +#define ioremap_wc ioremap_nocache #define ioremap_uc ioremap_nocache static inline void iounmap(volatile void __iomem *addr) diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h index 4cb4b6d3452c0b3439c3aa3c0f928f74de09fb3a..0bc66e1d3a7e9c81f1cca84943f60cd91d85a625 100644 --- a/arch/alpha/include/asm/types.h +++ b/arch/alpha/include/asm/types.h @@ -1,6 +1,6 @@ #ifndef _ALPHA_TYPES_H #define _ALPHA_TYPES_H -#include +#include #endif /* _ALPHA_TYPES_H */ diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index b37153ecf2ac33e19b7df4b061afddf4238f7ebb..db7fc0f511e2ae9cc905a6bae64f9195562c013f 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -3,7 +3,7 @@ #include -#define NR_SYSCALLS 514 +#define NR_SYSCALLS 523 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/arch/alpha/include/uapi/asm/types.h b/arch/alpha/include/uapi/asm/types.h index 9fd3cd459777767e7c80c4c761ee88278f62b17d..8d1024d7be0546bc744adf163e87f9dc723539fe 100644 --- a/arch/alpha/include/uapi/asm/types.h +++ b/arch/alpha/include/uapi/asm/types.h @@ -9,8 +9,18 @@ * need to be careful to avoid a name clashes. */ -#ifndef __KERNEL__ +/* + * This is here because we used to use l64 for alpha + * and we don't want to impact user mode with our change to ll64 + * in the kernel. + * + * However, some user programs are fine with this. They can + * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here. + */ +#if !defined(__SANE_USERSPACE_TYPES__) && !defined(__KERNEL__) #include +#else +#include #endif #endif /* _UAPI_ALPHA_TYPES_H */ diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h index aa33bf5aacb6c1666203e38700939750c90cb5c5..a2945fea6c868096e6094780bd96384205806a74 100644 --- a/arch/alpha/include/uapi/asm/unistd.h +++ b/arch/alpha/include/uapi/asm/unistd.h @@ -475,5 +475,19 @@ #define __NR_getrandom 511 #define __NR_memfd_create 512 #define __NR_execveat 513 +#define __NR_seccomp 514 +#define __NR_bpf 515 +#define __NR_userfaultfd 516 +#define __NR_membarrier 517 +#define __NR_mlock2 518 +#define __NR_copy_file_range 519 +#define __NR_preadv2 520 +#define __NR_pwritev2 521 +#define __NR_statx 522 + +/* Alpha doesn't have protection keys. */ +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free #endif /* _UAPI_ALPHA_UNISTD_H */ diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index d5f0580746a5d632452816b00427c212230b0810..03ff832b1cb4483e7d717def1c4f10ebe6c63619 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -351,7 +351,7 @@ marvel_init_io7(struct io7 *io7) } } -void +void __init marvel_io7_present(gct6_node *node) { int pe; @@ -369,6 +369,7 @@ marvel_io7_present(gct6_node *node) static void __init marvel_find_console_vga_hose(void) { +#ifdef CONFIG_VGA_HOSE u64 *pu64 = (u64 *)((u64)hwrpb + hwrpb->ctbt_offset); if (pu64[7] == 3) { /* TERM_TYPE == graphics */ @@ -402,9 +403,10 @@ marvel_find_console_vga_hose(void) pci_vga_hose = hose; } } +#endif } -gct6_search_struct gct_wanted_node_list[] = { +gct6_search_struct gct_wanted_node_list[] __initdata = { { GCT_TYPE_HOSE, GCT_SUBTYPE_IO_PORT_MODULE, marvel_io7_present }, { 0, 0, NULL } }; diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 219bf271c0ba2e5f2d668af707df57fbbd00ccfd..b532d925443d50b02f3a54481de122c36d948f79 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -461,6 +461,7 @@ titan_ioremap(unsigned long addr, unsigned long size) unsigned long *ptes; unsigned long pfn; +#ifdef CONFIG_VGA_HOSE /* * Adjust the address and hose, if necessary. */ @@ -468,6 +469,7 @@ titan_ioremap(unsigned long addr, unsigned long size) h = pci_vga_hose->index; addr += pci_vga_hose->mem_space->start; } +#endif /* * Find the hose. diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 936bc8f89a679f2f8aeb2f7e3cb41f11190a2d77..47632fa8c24e02418cdbac8d5c139f101aa51945 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -181,6 +181,9 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, switch (r_type) { case R_ALPHA_NONE: break; + case R_ALPHA_REFLONG: + *(u32 *)location = value; + break; case R_ALPHA_REFQUAD: /* BUG() can produce misaligned relocations. */ ((u32 *)location)[0] = value; diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 9fc560459ebd64ad3735f9b4dcd5ce6596b7bfee..f6726a74642703381c2151e9d927e267fcba0975 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -115,7 +115,7 @@ wait_boot_cpu_to_stop(int cpuid) /* * Where secondaries begin a life of C. */ -void +void __init smp_callin(void) { int cpuid = hard_smp_processor_id(); diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 9b62e3fd4f038a925657beb15de3de89f8548473..5b4514abb23450998515530ebc824ae1857f27b9 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -532,6 +532,15 @@ sys_call_table: .quad sys_getrandom .quad sys_memfd_create .quad sys_execveat + .quad sys_seccomp + .quad sys_bpf /* 515 */ + .quad sys_userfaultfd + .quad sys_membarrier + .quad sys_mlock2 + .quad sys_copy_file_range + .quad sys_preadv2 /* 520 */ + .quad sys_pwritev2 + .quad sys_statx .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index 7083434dd2419b7b3ef6ef8e74c204522d4a3257..a8081596036457af2149b37d356c64f2f1e7acc6 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -20,12 +20,8 @@ lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \ checksum.o \ csum_partial_copy.o \ $(ev67-y)strlen.o \ - $(ev67-y)strcat.o \ - strcpy.o \ - $(ev67-y)strncat.o \ - strncpy.o \ - $(ev6-y)stxcpy.o \ - $(ev6-y)stxncpy.o \ + stycpy.o \ + styncpy.o \ $(ev67-y)strchr.o \ $(ev67-y)strrchr.o \ $(ev6-y)memchr.o \ @@ -49,3 +45,17 @@ AFLAGS___remlu.o = -DREM -DINTSIZE $(addprefix $(obj)/,__divqu.o __remqu.o __divlu.o __remlu.o): \ $(src)/$(ev6-y)divide.S FORCE $(call if_changed_rule,as_o_S) + +# There are direct branches between {str*cpy,str*cat} and stx*cpy. +# Ensure the branches are within range by merging these objects. + +LDFLAGS_stycpy.o := -r +LDFLAGS_styncpy.o := -r + +$(obj)/stycpy.o: $(obj)/strcpy.o $(obj)/$(ev67-y)strcat.o \ + $(obj)/$(ev6-y)stxcpy.o FORCE + $(call if_changed,ld) + +$(obj)/styncpy.o: $(obj)/strncpy.o $(obj)/$(ev67-y)strncat.o \ + $(obj)/$(ev6-y)stxncpy.o FORCE + $(call if_changed,ld) diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S index 159f1b7e6e495f098a28dc818c7c383ede2dda76..c277a1a4383e5fbb4bbb76b8c2c7802b8bbd6a26 100644 --- a/arch/alpha/lib/copy_user.S +++ b/arch/alpha/lib/copy_user.S @@ -34,7 +34,7 @@ .ent __copy_user __copy_user: .prologue 0 - and $18,$18,$0 + mov $18,$0 and $16,7,$3 beq $0,$35 beq $3,$36 diff --git a/arch/alpha/lib/ev6-copy_user.S b/arch/alpha/lib/ev6-copy_user.S index 35e6710d070054b1c9987d0f5c10f1c29510b1a2..954ca03ebebef371a4e1dd873efce4493de53309 100644 --- a/arch/alpha/lib/ev6-copy_user.S +++ b/arch/alpha/lib/ev6-copy_user.S @@ -45,9 +45,10 @@ # Pipeline info: Slotting & Comments __copy_user: .prologue 0 - andq $18, $18, $0 - subq $18, 32, $1 # .. E .. .. : Is this going to be a small copy? - beq $0, $zerolength # U .. .. .. : U L U L + mov $18, $0 # .. .. .. E + subq $18, 32, $1 # .. .. E. .. : Is this going to be a small copy? + nop # .. E .. .. + beq $18, $zerolength # U .. .. .. : U L U L and $16,7,$3 # .. .. .. E : is leading dest misalignment ble $1, $onebyteloop # .. .. U .. : 1st branch : small amount of data diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index cf90714a676d6baabbdc1beb2119fe530e1c84f3..067ea362fb3efc3bc3a9217aaf197763eafb275e 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -75,13 +75,20 @@ void arc_init_IRQ(void) * Set a default priority for all available interrupts to prevent * switching of register banks if Fast IRQ and multiple register banks * are supported by CPU. - * Also disable all IRQ lines so faulty external hardware won't + * Also disable private-per-core IRQ lines so faulty external HW won't * trigger interrupt that kernel is not ready to handle. */ for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) { write_aux_reg(AUX_IRQ_SELECT, i); write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO); - write_aux_reg(AUX_IRQ_ENABLE, 0); + + /* + * Only mask cpu private IRQs here. + * "common" interrupts are masked at IDU, otherwise it would + * need to be unmasked at each cpu, with IPIs + */ + if (i < FIRST_EXT_IRQ) + write_aux_reg(AUX_IRQ_ENABLE, 0); } /* setup status32, don't enable intr yet as kernel doesn't want */ diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index cef388025adf43aed091869f102e01e1bee72092..47b421fa0147be9604b1c808d378a0108f735b34 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -27,7 +27,7 @@ */ void arc_init_IRQ(void) { - int level_mask = 0, i; + unsigned int level_mask = 0, i; /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */ level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index 6713d0f2b3f4d3f3f62231bf6a74bf35828ce8b0..b1502df7b50923a70d996847d1cd258d92cda73c 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -56,8 +56,6 @@ aliases { serial0 = &uart0; - /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ - ethernet0 = &emac; ethernet1 = &xr819; }; @@ -104,13 +102,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts b/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts index d756ff825116029557ec53f022f63e6ab2ed0f25..a337af1de32246b69807f4ee3b65e04e63231967 100644 --- a/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-bananapi-m2-plus.dts @@ -52,7 +52,6 @@ compatible = "sinovoip,bpi-m2-plus", "allwinner,sun8i-h3"; aliases { - ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; }; @@ -115,30 +114,12 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&emac_rgmii_pins>; - phy-supply = <®_gmac_3v3>; - phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; - - allwinner,leds-active-low; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; status = "okay"; }; -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <0>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; diff --git a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts index 78f6c24952dd128249fd3010d212222832bb060a..8d2cc6e9a03faff3cc71965493e5c54c1359e9f3 100644 --- a/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts +++ b/arch/arm/boot/dts/sun8i-h3-nanopi-neo.dts @@ -46,10 +46,3 @@ model = "FriendlyARM NanoPi NEO"; compatible = "friendlyarm,nanopi-neo", "allwinner,sun8i-h3"; }; - -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts index 17cdeae19c6f0f297d680c5c4f1c09e36071cfda..8ff71b1bb45b1c918d71e686ef4da4de7250306a 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-2.dts @@ -54,7 +54,6 @@ aliases { serial0 = &uart0; /* ethernet0 is the H3 emac, defined in sun8i-h3.dtsi */ - ethernet0 = &emac; ethernet1 = &rtl8189; }; @@ -118,13 +117,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts index 6880268e8b87b0d7385e73dc95c23aaa8f25bd9a..5fea430e0eb1006120dd9b98dd904cdd7af14b67 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts @@ -52,7 +52,6 @@ compatible = "xunlong,orangepi-one", "allwinner,sun8i-h3"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -98,13 +97,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index a10281b455f50ccad1f26087ae14884600c19c90..8b93f5c781a70b565ed0012d2c29b35f04987dcd 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts index 998b60f8d295e85fe09e184943ffafd45eb1da33..1a044b17d6c61e5cc391782c5e06aa3df7322548 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts @@ -52,7 +52,6 @@ compatible = "xunlong,orangepi-pc", "allwinner,sun8i-h3"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -114,13 +113,6 @@ status = "okay"; }; -&emac { - phy-handle = <&int_mii_phy>; - phy-mode = "mii"; - allwinner,leds-active-low; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts index 331ed683ac62cb861e17971038774bda0209f461..828ae7a526d924955e666a4ad110f565aa931bf5 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus.dts @@ -47,10 +47,6 @@ model = "Xunlong Orange Pi Plus / Plus 2"; compatible = "xunlong,orangepi-plus", "allwinner,sun8i-h3"; - aliases { - ethernet0 = &emac; - }; - reg_gmac_3v3: gmac-3v3 { compatible = "regulator-fixed"; regulator-name = "gmac-3v3"; @@ -78,24 +74,6 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&emac_rgmii_pins>; - phy-supply = <®_gmac_3v3>; - phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; - - allwinner,leds-active-low; - status = "okay"; -}; - -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <0>; - }; -}; - &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_8bit_pins>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 80026f3caafc8928043baf01c3d0387301562292..97920b12a944526f3c5dd15de7435f5e30195a3c 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -61,19 +61,3 @@ gpio = <&pio 3 6 GPIO_ACTIVE_HIGH>; /* PD6 */ }; }; - -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&emac_rgmii_pins>; - phy-supply = <®_gmac_3v3>; - phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; - status = "okay"; -}; - -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index d38282b9e5d442cbf1709e38f8a64ee9bfacce1c..11240a8313c266a33ec7ad1233aca2a976d4af3f 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -391,32 +391,6 @@ clocks = <&osc24M>; }; - emac: ethernet@1c30000 { - compatible = "allwinner,sun8i-h3-emac"; - syscon = <&syscon>; - reg = <0x01c30000 0x10000>; - interrupts = ; - interrupt-names = "macirq"; - resets = <&ccu RST_BUS_EMAC>; - reset-names = "stmmaceth"; - clocks = <&ccu CLK_BUS_EMAC>; - clock-names = "stmmaceth"; - #address-cells = <1>; - #size-cells = <0>; - status = "disabled"; - - mdio: mdio { - #address-cells = <1>; - #size-cells = <0>; - int_mii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - clocks = <&ccu CLK_BUS_EPHY>; - resets = <&ccu RST_BUS_EPHY>; - }; - }; - }; - spi0: spi@01c68000 { compatible = "allwinner,sun8i-h3-spi"; reg = <0x01c68000 0x1000>; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 127e2dd2e21ce12c7daa21e682ce72d36f6b1afd..4a879f6ff13bea92d189eec9370be73cd616705c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,12 +225,6 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -/* We do not have shadow page tables, hence the empty hooks */ -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 779fb1f680b339dd9e4fb88826fffb5127d00dd2..b3b3b3a19183507f736669ebc4f6a55302dfb571 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -8,7 +8,7 @@ ccflags-y := -I$(srctree)/$(src)/include \ # Common support obj-y := id.o io.o control.o devices.o fb.o timer.o pm.o \ common.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \ - omap_device.o omap-headsmp.o sram.o drm.o + omap_device.o omap-headsmp.o sram.o hwmod-common = omap_hwmod.o omap_hwmod_reset.o \ omap_hwmod_common_data.o diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index b1e661bb5521e4281a487c25cbfea12c0b29f91f..583fc39d84cd1f273f0c09d79318dd2b9a49b87c 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -33,6 +33,7 @@ static void __init __maybe_unused omap_generic_init(void) pdata_quirks_init(omap_dt_match_table); omapdss_init_of(); + omap_soc_device_init(); } #ifdef CONFIG_SOC_OMAP2420 diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 8fa01c0ecdb29ab9b3a9057a23bf610ce3070447..b3f6eb5d04a260226eaa46e17b476e11ecb6e274 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -66,6 +66,7 @@ */ #define FRAMEDONE_IRQ_TIMEOUT 100 +#if defined(CONFIG_FB_OMAP2) static struct platform_device omap_display_device = { .name = "omapdss", .id = -1, @@ -163,6 +164,65 @@ static enum omapdss_version __init omap_display_get_version(void) return OMAPDSS_VER_UNKNOWN; } +static int __init omapdss_init_fbdev(void) +{ + static struct omap_dss_board_info board_data = { + .dsi_enable_pads = omap_dsi_enable_pads, + .dsi_disable_pads = omap_dsi_disable_pads, + .set_min_bus_tput = omap_dss_set_min_bus_tput, + }; + struct device_node *node; + int r; + + board_data.version = omap_display_get_version(); + if (board_data.version == OMAPDSS_VER_UNKNOWN) { + pr_err("DSS not supported on this SoC\n"); + return -ENODEV; + } + + omap_display_device.dev.platform_data = &board_data; + + r = platform_device_register(&omap_display_device); + if (r < 0) { + pr_err("Unable to register omapdss device\n"); + return r; + } + + /* create vrfb device */ + r = omap_init_vrfb(); + if (r < 0) { + pr_err("Unable to register omapvrfb device\n"); + return r; + } + + /* create FB device */ + r = omap_init_fb(); + if (r < 0) { + pr_err("Unable to register omapfb device\n"); + return r; + } + + /* create V4L2 display device */ + r = omap_init_vout(); + if (r < 0) { + pr_err("Unable to register omap_vout device\n"); + return r; + } + + /* add DSI info for omap4 */ + node = of_find_node_by_name(NULL, "omap4_padconf_global"); + if (node) + omap4_dsi_mux_syscon = syscon_node_to_regmap(node); + + return 0; +} +#else +static inline int omapdss_init_fbdev(void) +{ + return 0; +} +#endif /* CONFIG_FB_OMAP2 */ + static void dispc_disable_outputs(void) { u32 v, irq_mask = 0; @@ -335,16 +395,9 @@ static struct device_node * __init omapdss_find_dss_of_node(void) int __init omapdss_init_of(void) { int r; - enum omapdss_version ver; struct device_node *node; struct platform_device *pdev; - static struct omap_dss_board_info board_data = { - .dsi_enable_pads = omap_dsi_enable_pads, - .dsi_disable_pads = omap_dsi_disable_pads, - .set_min_bus_tput = omap_dss_set_min_bus_tput, - }; - /* only create dss helper devices if dss is enabled in the .dts */ node = omapdss_find_dss_of_node(); @@ -354,13 +407,6 @@ int __init omapdss_init_of(void) if (!of_device_is_available(node)) return 0; - ver = omap_display_get_version(); - - if (ver == OMAPDSS_VER_UNKNOWN) { - pr_err("DSS not supported on this SoC\n"); - return -ENODEV; - } - pdev = of_find_device_by_node(node); if (!pdev) { @@ -374,48 +420,5 @@ int __init omapdss_init_of(void) return r; } - board_data.version = ver; - - omap_display_device.dev.platform_data = &board_data; - - r = platform_device_register(&omap_display_device); - if (r < 0) { - pr_err("Unable to register omapdss device\n"); - return r; - } - - /* create DRM device */ - r = omap_init_drm(); - if (r < 0) { - pr_err("Unable to register omapdrm device\n"); - return r; - } - - /* create vrfb device */ - r = omap_init_vrfb(); - if (r < 0) { - pr_err("Unable to register omapvrfb device\n"); - return r; - } - - /* create FB device */ - r = omap_init_fb(); - if (r < 0) { - pr_err("Unable to register omapfb device\n"); - return r; - } - - /* create V4L2 display device */ - r = omap_init_vout(); - if (r < 0) { - pr_err("Unable to register omap_vout device\n"); - return r; - } - - /* add DSI info for omap4 */ - node = of_find_node_by_name(NULL, "omap4_padconf_global"); - if (node) - omap4_dsi_mux_syscon = syscon_node_to_regmap(node); - - return 0; + return omapdss_init_fbdev(); } diff --git a/arch/arm/mach-omap2/display.h b/arch/arm/mach-omap2/display.h index 9a39646d43160c2152a2bea1a5b39ac6b7ede63f..42ec2e99a2f4830193af2e6ceb5415c24698f417 100644 --- a/arch/arm/mach-omap2/display.h +++ b/arch/arm/mach-omap2/display.h @@ -26,7 +26,6 @@ struct omap_dss_dispc_dev_attr { bool has_framedonetv_irq; }; -int omap_init_drm(void); int omap_init_vrfb(void); int omap_init_fb(void); int omap_init_vout(void); diff --git a/arch/arm/mach-omap2/drm.c b/arch/arm/mach-omap2/drm.c deleted file mode 100644 index 44fef961bb70d8baddb9560de32f975753989e26..0000000000000000000000000000000000000000 --- a/arch/arm/mach-omap2/drm.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * DRM/KMS device registration for TI OMAP platforms - * - * Copyright (C) 2012 Texas Instruments - * Author: Rob Clark - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "soc.h" -#include "display.h" - -#if IS_ENABLED(CONFIG_DRM_OMAP) - -static struct omap_drm_platform_data platform_data; - -static struct platform_device omap_drm_device = { - .dev = { - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &platform_data, - }, - .name = "omapdrm", - .id = 0, -}; - -int __init omap_init_drm(void) -{ - platform_data.omaprev = GET_OMAP_TYPE; - - return platform_device_register(&omap_drm_device); - -} -#else -int __init omap_init_drm(void) { return 0; } -#endif diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 1cd20e4d56b06fe2afb4ce7d79bc310446e07762..cb5d7314cf996a3c844c476de84715bdb794328e 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -428,7 +428,6 @@ static void __init __maybe_unused omap_hwmod_init_postsetup(void) static void __init __maybe_unused omap_common_late_init(void) { omap2_common_pm_late_init(); - omap_soc_device_init(); } #ifdef CONFIG_SOC_OMAP2420 diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index ba2fde2909f949531bf4e63e25fe22856b0cec3b..6872135d7f849b1df7e0529b9b95d2b0d9c86478 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -51,7 +51,6 @@ compatible = "sinovoip,bananapi-m64", "allwinner,sun50i-a64"; aliases { - ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; }; @@ -68,14 +67,6 @@ }; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; - phy-handle = <&ext_rgmii_phy>; - status = "okay"; -}; - &i2c1 { pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; @@ -86,13 +77,6 @@ bias-pull-up; }; -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index 24f1aac366d64355f5b6b37bb8e263bcce7f2e2d..f82ccf332c0fac86f2756f5818610d086d1c2cc1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -48,18 +48,3 @@ /* TODO: Camera, touchscreen, etc. */ }; - -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; - phy-handle = <&ext_rgmii_phy>; - status = "okay"; -}; - -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 827168bc22ed2c6f9b831e4b3b92d75980becf1d..7c533b6d4ba9abb3c48ae7ce04e1e5fac0e647c3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -51,7 +51,6 @@ compatible = "pine64,pine64", "allwinner,sun50i-a64"; aliases { - ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; serial2 = &uart2; @@ -79,15 +78,6 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rmii_pins>; - phy-mode = "rmii"; - phy-handle = <&ext_rmii_phy1>; - status = "okay"; - -}; - &i2c1 { pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; @@ -98,13 +88,6 @@ bias-pull-up; }; -&mdio { - ext_rmii_phy1: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 216e3a5dafaef892fc764a076a0e0e5bdfaf396e..d891a1a27f6c56f7a1d1a9a09fba0d27012fa1a3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -53,7 +53,6 @@ "allwinner,sun50i-a64"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -77,21 +76,6 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; - phy-handle = <&ext_rgmii_phy>; - status = "okay"; -}; - -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc2 { pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index bd0f33b77f5728781f558414eebf1eb02055a78d..68aadc9b96dc1ea2e7c26e658e0056d4f38ba4f9 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -449,26 +449,6 @@ #size-cells = <0>; }; - emac: ethernet@1c30000 { - compatible = "allwinner,sun50i-a64-emac"; - syscon = <&syscon>; - reg = <0x01c30000 0x10000>; - interrupts = ; - interrupt-names = "macirq"; - resets = <&ccu RST_BUS_EMAC>; - reset-names = "stmmaceth"; - clocks = <&ccu CLK_BUS_EMAC>; - clock-names = "stmmaceth"; - status = "disabled"; - #address-cells = <1>; - #size-cells = <0>; - - mdio: mdio { - #address-cells = <1>; - #size-cells = <0>; - }; - }; - gic: interrupt-controller@1c81000 { compatible = "arm,gic-400"; reg = <0x01c81000 0x1000>, diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 9689087611945ff8f7bd3413d1c4b3e39ad14357..1c2387bd5df6f13cae7201e656923704c2878c8e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -50,7 +50,6 @@ compatible = "friendlyarm,nanopi-neo2", "allwinner,sun50i-h5"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -109,22 +108,6 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&emac_rgmii_pins>; - phy-supply = <®_gmac_3v3>; - phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; - status = "okay"; -}; - -&mdio { - ext_rgmii_phy: ethernet-phy@7 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <7>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index a8296feee88411718818ab5452202d8b19b7b921..4f77c8470f6c3377219dd908cc76aa87cb583607 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -59,7 +59,6 @@ }; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -137,28 +136,12 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&emac_rgmii_pins>; - phy-supply = <®_gmac_3v3>; - phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; status = "okay"; }; -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index d906b302cbcdc9c6a95a1d95bbe79c3d7b853aef..6be06873e5afe55d39aab3c3564b9851bdb17637 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -54,7 +54,6 @@ compatible = "xunlong,orangepi-prime", "allwinner,sun50i-h5"; aliases { - ethernet0 = &emac; serial0 = &uart0; }; @@ -144,28 +143,12 @@ status = "okay"; }; -&emac { - pinctrl-names = "default"; - pinctrl-0 = <&emac_rgmii_pins>; - phy-supply = <®_gmac_3v3>; - phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; - status = "okay"; -}; - &ir { pinctrl-names = "default"; pinctrl-0 = <&ir_pins_a>; status = "okay"; }; -&mdio { - ext_rgmii_phy: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; -}; - &mmc0 { pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>; diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi index e2b0da2c0bc771a5d5f3ef721ee45f58ad466554..105b2938082fb68539e607b003a0980a05184681 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi @@ -280,9 +280,6 @@ &decon { status = "okay"; - - i80-if-timings { - }; }; &decon_tv { @@ -1116,9 +1113,6 @@ &mic { status = "okay"; - - i80-if-timings { - }; }; &pmu_system_controller { diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index 1eb1f1e9aac4cfb36d2de827ff8c5bd4970a3ad0..4d360713ed12459199ca726f34c32266739865dc 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -268,10 +268,10 @@ ap_gpio: gpio { compatible = "marvell,armada-8k-gpio"; offset = <0x1040>; - ngpios = <19>; + ngpios = <20>; gpio-controller; #gpio-cells = <2>; - gpio-ranges = <&ap_pinctrl 0 0 19>; + gpio-ranges = <&ap_pinctrl 0 0 20>; }; }; }; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d68630007b14f542865df7e49bc0efacefe9a39d..e923b58606e2bf8e33a76bd4a8a9a4c1336199c6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -326,12 +326,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -/* We do not have shadow page tables, hence the empty hooks */ -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); diff --git a/arch/c6x/configs/dsk6455_defconfig b/arch/c6x/configs/dsk6455_defconfig index 4663487c67a176472469348c957137ed12c68412..d764ea4cce7f51c739cc65408df3d469b87d66a1 100644 --- a/arch/c6x/configs/dsk6455_defconfig +++ b/arch/c6x/configs/dsk6455_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6455=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -25,7 +24,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6457_defconfig b/arch/c6x/configs/evmc6457_defconfig index bba40e195ec43d1e70d9c2ccc6a4876f4a96abfd..05d0b4a25ab1fcba6500106cc68a7e546aaeeb78 100644 --- a/arch/c6x/configs/evmc6457_defconfig +++ b/arch/c6x/configs/evmc6457_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6457=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -26,7 +25,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6472_defconfig b/arch/c6x/configs/evmc6472_defconfig index 8c46155f6d311befd6b2b6d90369149290c5fd4c..8d81fcf86b0e0e86ca9d8bca1a260e9217706b68 100644 --- a/arch/c6x/configs/evmc6472_defconfig +++ b/arch/c6x/configs/evmc6472_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6472=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6474_defconfig b/arch/c6x/configs/evmc6474_defconfig index 15533f63231315d7282a0442d9ebd48ef35f0dd3..8156a98f3958be3af5d8a407f6dbae8d6f26bfd0 100644 --- a/arch/c6x/configs/evmc6474_defconfig +++ b/arch/c6x/configs/evmc6474_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6474=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6678_defconfig b/arch/c6x/configs/evmc6678_defconfig index 5f126d4905b1973f502010959592b24a94d71aa5..c4f433c25b69d94618779049f9f38fa5ac872437 100644 --- a/arch/c6x/configs/evmc6678_defconfig +++ b/arch/c6x/configs/evmc6678_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6678=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c index 43afc03e41251d45c2ccd0f0a00656ad5e368fe9..9519fa5f97d0c224b83d65abdacd7495a613de14 100644 --- a/arch/c6x/platforms/megamod-pic.c +++ b/arch/c6x/platforms/megamod-pic.c @@ -208,14 +208,14 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) pic = kzalloc(sizeof(struct megamod_pic), GFP_KERNEL); if (!pic) { - pr_err("%s: Could not alloc PIC structure.\n", np->full_name); + pr_err("%pOF: Could not alloc PIC structure.\n", np); return NULL; } pic->irqhost = irq_domain_add_linear(np, NR_COMBINERS * 32, &megamod_domain_ops, pic); if (!pic->irqhost) { - pr_err("%s: Could not alloc host.\n", np->full_name); + pr_err("%pOF: Could not alloc host.\n", np); goto error_free; } @@ -225,7 +225,7 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) pic->regs = of_iomap(np, 0); if (!pic->regs) { - pr_err("%s: Could not map registers.\n", np->full_name); + pr_err("%pOF: Could not map registers.\n", np); goto error_free; } @@ -253,8 +253,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) irq_data = irq_get_irq_data(irq); if (!irq_data) { - pr_err("%s: combiner-%d no irq_data for virq %d!\n", - np->full_name, i, irq); + pr_err("%pOF: combiner-%d no irq_data for virq %d!\n", + np, i, irq); continue; } @@ -265,16 +265,16 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) * of the core priority interrupts (4 - 15). */ if (hwirq < 4 || hwirq >= NR_PRIORITY_IRQS) { - pr_err("%s: combiner-%d core irq %ld out of range!\n", - np->full_name, i, hwirq); + pr_err("%pOF: combiner-%d core irq %ld out of range!\n", + np, i, hwirq); continue; } /* record the mapping */ mapping[hwirq - 4] = i; - pr_debug("%s: combiner-%d cascading to hwirq %ld\n", - np->full_name, i, hwirq); + pr_debug("%pOF: combiner-%d cascading to hwirq %ld\n", + np, i, hwirq); cascade_data[i].pic = pic; cascade_data[i].index = i; @@ -290,8 +290,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) /* Finally, set up the MUX registers */ for (i = 0; i < NR_MUX_OUTPUTS; i++) { if (mapping[i] != IRQ_UNMAPPED) { - pr_debug("%s: setting mux %d to priority %d\n", - np->full_name, mapping[i], i + 4); + pr_debug("%pOF: setting mux %d to priority %d\n", + np, mapping[i], i + 4); set_megamod_mux(pic, mapping[i], i); } } diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c index 755359eb628622ffd60d2bdd6ebb3a060159620b..e8b6cc6a7b5ac4e97f20877e05817e209fff7e91 100644 --- a/arch/c6x/platforms/plldata.c +++ b/arch/c6x/platforms/plldata.c @@ -436,8 +436,8 @@ void __init c64x_setup_clocks(void) err = of_property_read_u32(node, "clock-frequency", &val); if (err || val == 0) { - pr_err("%s: no clock-frequency found! Using %dMHz\n", - node->full_name, (int)val / 1000000); + pr_err("%pOF: no clock-frequency found! Using %dMHz\n", + node, (int)val / 1000000); val = 25000000; } clkin1.rate = val; diff --git a/arch/c6x/platforms/timer64.c b/arch/c6x/platforms/timer64.c index 0bd0452ded80d53698dd1673e15be770d89f8ae8..241a9a607193a00a726fcda8da4d2921d9bcbc83 100644 --- a/arch/c6x/platforms/timer64.c +++ b/arch/c6x/platforms/timer64.c @@ -204,14 +204,14 @@ void __init timer64_init(void) timer = of_iomap(np, 0); if (!timer) { - pr_debug("%s: Cannot map timer registers.\n", np->full_name); + pr_debug("%pOF: Cannot map timer registers.\n", np); goto out; } - pr_debug("%s: Timer registers=%p.\n", np->full_name, timer); + pr_debug("%pOF: Timer registers=%p.\n", np, timer); cd->irq = irq_of_parse_and_map(np, 0); if (cd->irq == NO_IRQ) { - pr_debug("%s: Cannot find interrupt.\n", np->full_name); + pr_debug("%pOF: Cannot find interrupt.\n", np); iounmap(timer); goto out; } @@ -229,7 +229,7 @@ void __init timer64_init(void) dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_ENABLED); } - pr_debug("%s: Timer irq=%d.\n", np->full_name, cd->irq); + pr_debug("%pOF: Timer irq=%d.\n", np, cd->irq); clockevents_calc_mult_shift(cd, c6x_core_freq / TIMER_DIVISOR, 5); diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h index 455ce7ddbf142e9b5878d4259ea1c92d5f7ec05b..dfcb0e4eb25680f643f840009af29f3b3c40af58 100644 --- a/arch/m32r/include/asm/flat.h +++ b/arch/m32r/include/asm/flat.h @@ -95,7 +95,7 @@ static inline unsigned long m32r_flat_get_addr_from_rp (u32 *rp, return ~0; /* bogus value */ } -static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval) +static inline int flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval) { unsigned int reloc = flat_m32r_get_reloc_type (relval); if (reloc & 0xf0) { @@ -133,6 +133,7 @@ static inline void flat_put_addr_at_rp(u32 *rp, u32 addr, u32 relval) break; } } + return 0; } // kludge - text_len is a local variable in the only user. diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h index f23c3d266bae3e4e8eb006deca5bbfeed10cecbe..3d2747d4c9672d8a2178df115e3f75c66d88e443 100644 --- a/arch/microblaze/include/asm/flat.h +++ b/arch/microblaze/include/asm/flat.h @@ -60,7 +60,7 @@ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, * unaligned. */ -static inline void +static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 relval) { u32 *p = (__force u32 *)rp; diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2998479fd4e83f0ac4c6ccd7d89938c7cc9a6f5f..a9af1d2dcd699114d00a55689c29137cef384841 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -938,11 +938,6 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - /* Emulation */ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 6dd13641a4188e2a977592a3a0ed5e964bca064b..1395654cfc8d833b62ded894af5639cc696367c5 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -872,15 +872,13 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) if (unlikely(test_thread_flag(TIF_SECCOMP))) { int ret, i; struct seccomp_data sd; + unsigned long args[6]; sd.nr = syscall; sd.arch = syscall_get_arch(); - for (i = 0; i < 6; i++) { - unsigned long v, r; - - r = mips_get_syscall_arg(&v, current, regs, i); - sd.args[i] = r ? 0 : v; - } + syscall_get_arguments(current, regs, 0, 6, args); + for (i = 0; i < 6; i++) + sd.args[i] = args[i]; sd.instruction_pointer = KSTK_EIP(current); ret = __secure_computing(&sd); diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 27c2f90eeb21f60e07e6cb4534fb33cdd0b68ef9..a9a7d78803cde30097a02c76aa49bef9f812be7e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -190,12 +190,6 @@ illegal_syscall: sll t1, t0, 2 beqz v0, einval lw t2, sys_call_table(t1) # syscall routine - sw a0, PT_R2(sp) # call routine directly on restart - - /* Some syscalls like execve get their arguments from struct pt_regs - and claim zero arguments in the syscall table. Thus we have to - assume the worst case and shuffle around all potential arguments. - If you want performance, don't use indirect syscalls. */ move a0, a1 # shift argument registers move a1, a2 @@ -207,11 +201,6 @@ illegal_syscall: sw t4, 16(sp) sw t5, 20(sp) sw t6, 24(sp) - sw a0, PT_R4(sp) # .. and push back a0 - a3, some - sw a1, PT_R5(sp) # syscalls expect them there - sw a2, PT_R6(sp) - sw a3, PT_R7(sp) - sw a3, PT_R26(sp) # update a3 for syscall restarting jr t2 /* Unreached */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index c30bc520885f985a921b066bfb45d5dea8aadab2..9ebe3e2403b1d7b84d66732cd261364208f6020d 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -198,7 +198,6 @@ LEAF(sys32_syscall) dsll t1, t0, 3 beqz v0, einval ld t2, sys32_call_table(t1) # syscall routine - sd a0, PT_R2(sp) # call routine directly on restart move a0, a1 # shift argument registers move a1, a2 @@ -207,11 +206,6 @@ LEAF(sys32_syscall) move a4, a5 move a5, a6 move a6, a7 - sd a0, PT_R4(sp) # ... and push back a0 - a3, some - sd a1, PT_R5(sp) # syscalls expect them there - sd a2, PT_R6(sp) - sd a3, PT_R7(sp) - sd a3, PT_R26(sp) # update a3 for syscall restarting jr t2 /* Unreached */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8b3f1238d07f18eda49eb9326934427cf2d83736..e372ed871c513b00e78f69898601c598ee5a026b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -67,11 +67,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ -} - #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 #define HPTEG_HASH_BITS_PTE_LONG 12 diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 0c76675394c5930d5cecf2ac01a2718c8e9b6c1f..35bec1c5bd5aa63567be4847c2073be53fa0390c 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -90,6 +90,24 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + + /* + * This full barrier orders the store to the cpumask above vs + * a subsequent operation which allows this CPU to begin loading + * translations for next. + * + * When using the radix MMU that operation is the load of the + * MMU context id, which is then moved to SPRN_PID. + * + * For the hash MMU it is either the first load from slb_cache + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). + * + * On the read side the barrier is in pte_xchg(), which orders + * the store to the PTE vs the load of mm_cpumask. + */ + smp_mb(); + new_on_cpu = true; } diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 9c0f5db5cf461a92e72185701b4cbc1df168dfcc..67e7e3d990f44ef495ee02b6fcb3ba16053bd5c5 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) unsigned long *p = (unsigned long *)ptep; __be64 prev; + /* See comment in switch_mm_irqs_off() */ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old), (__force unsigned long)pte_raw(new)); diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 8bd3b13fe2fb2e8bd1c5762c4e080c9cd921edaa..369a164b545c09087e0740fd6c32ea282a7b5245 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) { unsigned long *p = (unsigned long *)ptep; + /* See comment in switch_mm_irqs_off() */ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new)); } #endif diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index a160c14304eba22bd83ee98b8f052a2ed8bf7a3f..53766e2bc029e25efc87270f6983e3b7975be31d 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; + struct kvmppc_spapr_tce_table *siter; unsigned long npages, size; int ret = -ENOMEM; int i; + int fd = -1; if (!args->size) return -EINVAL; - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); - if (ret) { - stt = NULL; - goto fail; - } + if (ret) + return ret; ret = -ENOMEM; stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) - goto fail; + goto fail_acct; stt->liobn = args->liobn; stt->page_shift = args->page_shift; @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); + ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + if (ret < 0) + goto fail; mutex_lock(&kvm->lock); - list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + + /* Check this LIOBN hasn't been previously allocated */ + ret = 0; + list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { + if (siter->liobn == args->liobn) { + ret = -EBUSY; + break; + } + } + + if (!ret) { + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + kvm_get_kvm(kvm); + } mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); + if (!ret) + return fd; -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); + put_unused_fd(fd); - kfree(stt); - } + fail: + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + fail_acct: + kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c52184a8efdf025c1efffc6dd7310e9374dca630..9c9c983b864f8d64a81eac456adc405cc0252e5d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f +BEGIN_FTR_SECTION + PPC_MSGSYNC +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 beq 4f diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4636ca6e7d383b7d3ce26b18df01152b8087a8ec..d1ed2c41b5d246dde6d53c34530bc30dc63656b9 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,7 +16,22 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; - /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */ + /* + * Ensure any previous store to CPPR is ordered vs. + * the subsequent loads from PIPR or ACK. + */ + eieio(); + + /* + * DD1 bug workaround: If PIPR is less favored than CPPR + * ignore the interrupt or we might incorrectly lose an IPB + * bit. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + if (pipr >= xc->hw_cppr) + return; + } /* Perform the acknowledge OS to register cycle. */ ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); @@ -235,6 +250,11 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc, /* * If we found an interrupt, adjust what the guest CPPR should * be as if we had just fetched that interrupt from HW. + * + * Note: This can only make xc->cppr smaller as the previous + * loop will only exit with hirq != 0 if prio is lower than + * the current xc->cppr. Thus we don't need to re-check xc->mfrr + * for pending IPIs. */ if (hirq) xc->cppr = prio; @@ -380,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) old_cppr = xc->cppr; xc->cppr = cppr; + /* + * Order the above update of xc->cppr with the subsequent + * read of xc->mfrr inside push_pending_to_hw() + */ + smp_mb(); + /* * We are masking less, we need to look for pending things * to deliver and set VP pending bits accordingly to trigger @@ -420,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) * used to signal MFRR changes is EOId when fetched from * the queue. */ - if (irq == XICS_IPI || irq == 0) + if (irq == XICS_IPI || irq == 0) { + /* + * This barrier orders the setting of xc->cppr vs. + * subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); goto bail; + } /* Find interrupt source */ sb = kvmppc_xive_find_source(xive, irq, &src); if (!sb) { pr_devel(" source not found !\n"); rc = H_PARAMETER; + /* Same as above */ + smp_mb(); goto bail; } state = &sb->irq_state[src]; kvmppc_xive_select_irq(state, &hw_num, &xd); state->in_eoi = true; - mb(); + + /* + * This barrier orders both setting of in_eoi above vs, + * subsequent test of guest_priority, and the setting + * of xc->cppr vs. subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); again: if (state->guest_priority == MASKED) { @@ -461,6 +503,14 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) } + /* + * This barrier orders the above guest_priority check + * and spin_lock/unlock with clearing in_eoi below. + * + * It also has to be a full mb() as it must ensure + * the MMIOs done in source_eoi() are completed before + * state->in_eoi is visible. + */ mb(); state->in_eoi = false; bail: @@ -495,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, /* Locklessly write over MFRR */ xc->mfrr = mfrr; + /* + * The load of xc->cppr below and the subsequent MMIO store + * to the IPI must happen after the above mfrr update is + * globally visible so that: + * + * - Synchronize with another CPU doing an H_EOI or a H_CPPR + * updating xc->cppr then reading xc->mfrr. + * + * - The target of the IPI sees the xc->mfrr update + */ + mb(); + /* Shoot the IPI if most favored than target cppr */ if (mfrr < xc->cppr) __x_writeq(0, __x_trig_page(&xc->vp_ipi_data)); diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b5d960d6db3d0b18d33273b31ac67e03caebd02b..4c7b8591f7379931a1d319c8edd5995610ca6d8c 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -614,15 +614,6 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, mmio_invalidate(npu_context, 1, address, true); } -static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - - mmio_invalidate(npu_context, 1, address, true); -} - static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -640,7 +631,6 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { .release = pnv_npu2_mn_release, .change_pte = pnv_npu2_mn_change_pte, - .invalidate_page = pnv_npu2_mn_invalidate_page, .invalidate_range = pnv_npu2_mn_invalidate_range, }; diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index b9300f8aee10a5754909e63946c6f06d0bbc74f1..07a82bc933a775df11a9d2084bcd0b4eeb768311 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -8,11 +8,12 @@ #include #include -#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) +#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \ + typeof(0?(__force t)0:0ULL), u64)) #define __SC_DELOUSE(t,v) ({ \ BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \ - (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ + (__force t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ }) #define PSW32_MASK_PER 0x40000000UL diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 4541ac44b35f0e39b831b3eba2e5669d29dd9f89..24bc41622a983cf353c6d414d521cbd94cd43cbb 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -44,6 +44,11 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | _ASCE_USER_BITS | _ASCE_TYPE_REGION3; break; + case -PAGE_SIZE: + /* forked 5-level task, set new asce with new_mm->pgd */ + mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION1; + break; case 1UL << 53: /* forked 4-level task, set new asce with new mm->pgd */ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 926b5244263efd3dd1fadf637ce4790251367e60..a2e5c24f47a7471a1e7730f654b8ce4599434ef0 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -394,7 +394,7 @@ static int sthyi(u64 vaddr) "srl %[cc],28\n" : [cc] "=d" (cc) : [code] "d" (code), [addr] "a" (addr) - : "memory", "cc"); + : "3", "memory", "cc"); return cc; } @@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); trace_kvm_s390_handle_sthyi(vcpu, code, addr); - if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + if (reg1 == reg2 || reg1 & 1 || reg2 & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (code & 0xffff) { @@ -433,6 +433,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) goto out; } + if (addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* * If the page has not yet been faulted in, we want to do that * now and not after all the expensive calculations. diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 2e10d2b8ad359607981ce381e27c3bf22e2e184d..5bea139517a2edc21dc50074d2c2e9a94dabb19e 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -119,7 +119,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; check_asce_limit: - if (addr + len > current->mm->context.asce_limit) { + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; @@ -183,7 +184,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, } check_asce_limit: - if (addr + len > current->mm->context.asce_limit) { + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { rc = crst_table_upgrade(mm, addr + len); if (rc) return (unsigned long) rc; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a0838ab929f2276de8ed08e2e0430f4e3e7943f8..c14217cd0155f09ab7f00e018ec25fe3b4c4cf5e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -116,8 +116,7 @@ void __putstr(const char *s) } } - if (boot_params->screen_info.orig_video_mode == 0 && - lines == 0 && cols == 0) + if (lines == 0 || cols == 0) return; x = boot_params->screen_info.orig_x; diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 2ed8f0c25def4b01dd50107425c5b6e54d60f2eb..1bb08ecffd24a61e1d9bccf2094e70996f280bce 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -520,8 +520,14 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # the description in lib/decompressor_xxx.c for specific information. # # extra_bytes = (uncompressed_size >> 12) + 65536 + 128 +# +# LZ4 is even worse: data that cannot be further compressed grows by 0.4%, +# or one byte per 256 bytes. OTOH, we can safely get rid of the +128 as +# the size-dependent part now grows so fast. +# +# extra_bytes = (uncompressed_size >> 8) + 65536 -#define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128) +#define ZO_z_extra_bytes ((ZO_z_output_len >> 8) + 65536) #if ZO_z_output_len > ZO_z_input_len # define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ ZO_z_input_len) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index af12e294caeda5f518cf8b80ce5765996f1d8c56..939050169d122c9d129219be27c09e6a9a9ef443 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2335,12 +2335,9 @@ static unsigned long get_segment_base(unsigned int segment) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; - if (idx > LDT_ENTRIES) - return 0; - /* IRQs are off, so this synchronizes with smp_store_release */ ldt = lockless_dereference(current->active_mm->context.ldt); - if (!ldt || idx > ldt->nr_entries) + if (!ldt || idx >= ldt->nr_entries) return 0; desc = &ldt->entries[idx]; @@ -2348,7 +2345,7 @@ static unsigned long get_segment_base(unsigned int segment) return 0; #endif } else { - if (idx > GDT_ENTRIES) + if (idx >= GDT_ENTRIES) return 0; desc = raw_cpu_ptr(gdt_page.gdt) + idx; diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 255645f60ca2b4be333de67a10c6780f364e6fb2..554cdb205d17586887e6b599c991b2fc090ba38a 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, -1); + copy_kernel_to_xregs(&fpstate->xsave, mask); } else { if (use_fxsr()) copy_kernel_to_fxregs(&fpstate->fxsave); @@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpstate); + __copy_kernel_to_fpregs(fpstate, -1); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fba6d8e12f07e8a9f191bdb028a1c3e6234..92c9032502d87b3291268f2c98b04ec4cb59854d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -492,6 +492,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 pkru; u32 hflags; u64 efer; u64 apic_base; @@ -1374,8 +1375,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); -void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 265c907d7d4c9b8c69e24792a20c5a3dfb6c95ee..7a234be7e29846618cbdafb0153705cb8a8566b3 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif - init_new_context_ldt(tsk, mm); - - return 0; + return init_new_context_ldt(tsk, mm); } static inline void destroy_context(struct mm_struct *mm) { diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index d907c3d8633fdb289ea03397c2e76efc284517aa..a4516ca4c4f3c684a3b150435c0e851d60aa3b48 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -527,6 +527,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), INTEL_GLK_IDS(&gen9_early_ops), + INTEL_CNL_IDS(&gen9_early_ops), }; static void __init diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59ca2eea522c466937287c3e660b8f38e49c12aa..19adbb4184439dd6c40c39cd6dec2afe5a9ae83a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_7_ECX); /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled) + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 762cdf2595f992fd4ac8bb1e4c2c8914b344db04..e1e89ee4af750dc51f78cfbf8aa71e22d77b4cd1 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); } -static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu) -{ - return kvm_x86_ops->get_pkru(vcpu); -} - static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d7d248a000dd6772681f3f5541e344f9677a2d1d..4b9a3ae6b725d37bdeeb5aeb24c0a9c2716228f4 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, * index of the protection domain, so pte_pkey * 2 is * is the index of the first bit for the domain. */ - pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; + pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ offset = (pfec & ~1) + diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 56ba05312759d3ed4568e546492d9d8bfad05b71..af256b786a70cccd14906fe926e2b790156967a4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } -static u32 svm_get_pkru(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .get_pkru = svm_get_pkru, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b21b12230354e334900e6536b7612285f75b7e3..c6ef2940119bfdfb00f0547c321697280ebae0fa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -636,8 +636,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - bool guest_pkru_valid; - u32 guest_pkru; u32 host_pkru; /* @@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_vmx(vcpu)->emulation_required = emulation_required(vcpu); } -static u32 vmx_get_pkru(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->guest_pkru; -} - static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); - if (vmx->guest_pkru_valid) - __write_pkru(vmx->guest_pkru); + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && + vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vcpu->arch.pkru); atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); @@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * back on host, so it is safe to read guest PKRU from current * XSAVE. */ - if (boot_cpu_has(X86_FEATURE_OSPKE)) { - vmx->guest_pkru = __read_pkru(); - if (vmx->guest_pkru != vmx->host_pkru) { - vmx->guest_pkru_valid = true; + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { + vcpu->arch.pkru = __read_pkru(); + if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); - } else - vmx->guest_pkru_valid = false; } /* @@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .get_pkru = vmx_get_pkru, - .tlb_flush = vmx_flush_tlb, .run = vmx_vcpu_run, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d734aa8c5b4f7290e365badd00ea962fd0af9acd..272320eb328c9bc8c2d0cf839a097a30ca27491f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest + offset, src, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(dest + offset, &vcpu->arch.pkru, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest + offset, src, size); + } valid -= feature; @@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest, src + offset, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(&vcpu->arch.pkru, src + offset, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest, src + offset, size); } valid -= feature; @@ -6725,17 +6734,6 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); -void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, - unsigned long address) -{ - /* - * The physical address of apic access page is stored in the VMCS. - * Update it when it becomes invalid. - */ - if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT)) - kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); -} - /* * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the @@ -7633,7 +7631,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, + ~XFEATURE_MASK_PKRU); trace_kvm_fpu(1); } diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index ae4cd58c0c7a403c8ba40f978e5d4221ce94bfc6..02250b2633b839c7fa56704d804dd4953f244121 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -50,7 +50,7 @@ void foo(void) DEFINE(HOST_GS, GS); DEFINE(HOST_ORIG_AX, ORIG_EAX); #else -#if defined(PTRACE_GETREGSET) && defined(PTRACE_SETREGSET) +#ifdef FP_XSTATE_MAGIC1 DEFINE(HOST_FP_SIZE, sizeof(struct _xstate) / sizeof(unsigned long)); #else DEFINE(HOST_FP_SIZE, sizeof(struct _fpstate) / sizeof(unsigned long)); diff --git a/block/Kconfig b/block/Kconfig index 89cd28f8d05129680aa531dd462fc013966c9654..3ab42bbb06d58fdb9d0dd679b160a6d1d27490af 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -206,4 +206,9 @@ config BLK_MQ_VIRTIO depends on BLOCK && VIRTIO default y +config BLK_MQ_RDMA + bool + depends on BLOCK && INFINIBAND + default y + source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 2b281cf258a0abdae62bf92c36955422a0b58b1d..9396ebc85d24516a1e22ebedfb7a70625ffaa8c3 100644 --- a/block/Makefile +++ b/block/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o +obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 9ebc2945f991e9ff5d50c5191de45ca1025f1201..4f927a58dff86183a95baf9c7f4eb474c218f0f2 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -75,6 +75,8 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(POLL_STATS), QUEUE_FLAG_NAME(REGISTERED), + QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), + QUEUE_FLAG_NAME(QUIESCED), }; #undef QUEUE_FLAG_NAME @@ -265,6 +267,7 @@ static const char *const cmd_flag_name[] = { CMD_FLAG_NAME(RAHEAD), CMD_FLAG_NAME(BACKGROUND), CMD_FLAG_NAME(NOUNMAP), + CMD_FLAG_NAME(NOWAIT), }; #undef CMD_FLAG_NAME diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c new file mode 100644 index 0000000000000000000000000000000000000000..996167f1de18dbd16696520cfdad96fc12500ae5 --- /dev/null +++ b/block/blk-mq-rdma.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 Sagi Grimberg. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include + +/** + * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device + * @set: tagset to provide the mapping for + * @dev: rdma device associated with @set. + * @first_vec: first interrupt vectors to use for queues (usually 0) + * + * This function assumes the rdma device @dev has at least as many available + * interrupt vetors as @set has queues. It will then query it's affinity mask + * and built queue mapping that maps a queue to the CPUs that have irq affinity + * for the corresponding vector. + * + * In case either the driver passed a @dev with less vectors than + * @set->nr_hw_queues, or @dev does not provide an affinity mask for a + * vector, we fallback to the naive mapping. + */ +int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, + struct ib_device *dev, int first_vec) +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < set->nr_hw_queues; queue++) { + mask = ib_get_vector_affinity(dev, first_vec + queue); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + set->mq_map[cpu] = queue; + } + + return 0; + +fallback: + return blk_mq_map_queues(set); +} +EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a7285bf2831c7bdbb89b753fccb198f8640e8780..80f5481fe9f6c1603e477349732dac62c3baf204 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -382,6 +382,14 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) } \ } while (0) +static inline unsigned int throtl_bio_data_size(struct bio *bio) +{ + /* assume it's one sector */ + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) + return 512; + return bio->bi_iter.bi_size; +} + static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); @@ -934,6 +942,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, bool rw = bio_data_dir(bio); u64 bytes_allowed, extra_bytes, tmp; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; + unsigned int bio_size = throtl_bio_data_size(bio); jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; @@ -947,14 +956,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, do_div(tmp, HZ); bytes_allowed = tmp; - if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { + if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) { if (wait) *wait = 0; return true; } /* Calc approx time to dispatch */ - extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed; + extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw)); if (!jiffy_wait) @@ -1034,11 +1043,12 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); + unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio->bi_iter.bi_size; + tg->bytes_disp[rw] += bio_size; tg->io_disp[rw]++; - tg->last_bytes_disp[rw] += bio->bi_iter.bi_size; + tg->last_bytes_disp[rw] += bio_size; tg->last_io_disp[rw]++; /* diff --git a/block/bsg-lib.c b/block/bsg-lib.c index c4513b23f57a6438af6ae38367c072931edf138c..dd56d7460cb91d504aa22ae9215d00f97c844359 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -29,26 +29,25 @@ #include /** - * bsg_destroy_job - routine to teardown/delete a bsg job + * bsg_teardown_job - routine to teardown a bsg job * @job: bsg_job that is to be torn down */ -static void bsg_destroy_job(struct kref *kref) +static void bsg_teardown_job(struct kref *kref) { struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, BLK_STS_OK); - put_device(job->dev); /* release reference for the request */ kfree(job->request_payload.sg_list); kfree(job->reply_payload.sg_list); - kfree(job); + + blk_end_request_all(rq, BLK_STS_OK); } void bsg_job_put(struct bsg_job *job) { - kref_put(&job->kref, bsg_destroy_job); + kref_put(&job->kref, bsg_teardown_job); } EXPORT_SYMBOL_GPL(bsg_job_put); @@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done); */ static void bsg_softirq_done(struct request *rq) { - struct bsg_job *job = rq->special; + struct bsg_job *job = blk_mq_rq_to_pdu(rq); bsg_job_put(job); } @@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) } /** - * bsg_create_job - create the bsg_job structure for the bsg request + * bsg_prepare_job - create the bsg_job structure for the bsg request * @dev: device that is being sent the bsg request * @req: BSG request that needs a job structure */ -static int bsg_create_job(struct device *dev, struct request *req) +static int bsg_prepare_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; - struct request_queue *q = req->q; struct scsi_request *rq = scsi_req(req); - struct bsg_job *job; + struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; - BUG_ON(req->special); - - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); - if (!job) - return -ENOMEM; - - req->special = job; - job->req = req; - if (q->bsg_job_size) - job->dd_data = (void *)&job[1]; job->request = rq->cmd; job->request_len = rq->cmd_len; - job->reply = rq->sense; - job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer - * allocated */ + if (req->bio) { ret = bsg_map_buffer(&job->request_payload, req); if (ret) @@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q) { struct device *dev = q->queuedata; struct request *req; - struct bsg_job *job; int ret; if (!get_device(dev)) @@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q) break; spin_unlock_irq(q->queue_lock); - ret = bsg_create_job(dev, req); + ret = bsg_prepare_job(dev, req); if (ret) { scsi_req(req)->result = ret; blk_end_request_all(req, BLK_STS_OK); @@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q) continue; } - job = req->special; - ret = q->bsg_job_fn(job); + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); spin_lock_irq(q->queue_lock); if (ret) break; @@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); } +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + memset(job, 0, sizeof(*job)); + + scsi_req_init(sreq); + sreq->sense_len = SCSI_SENSE_BUFFERSIZE; + sreq->sense = kzalloc(sreq->sense_len, gfp); + if (!sreq->sense) + return -ENOMEM; + + job->req = req; + job->reply = sreq->sense; + job->reply_len = sreq->sense_len; + job->dd_data = job + 1; + + return 0; +} + +static void bsg_exit_rq(struct request_queue *q, struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + kfree(sreq->sense); +} + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to @@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q = blk_alloc_queue(GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); - q->cmd_size = sizeof(struct scsi_request); + q->cmd_size = sizeof(struct bsg_job) + dd_job_size; + q->init_rq_fn = bsg_init_rq; + q->exit_rq_fn = bsg_exit_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); @@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, goto out_cleanup_queue; q->queuedata = dev; - q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 38554c2ea38adc28f0835b35f65cdb2c94451656..abaf9d78a2061d8d307a3f05858afbc8445c15d7 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -79,7 +79,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev, static int compat_hdio_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - unsigned long *__user p; + unsigned long __user *p; int error; p = compat_alloc_user_space(sizeof(unsigned long)); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 43839b00fe6c42fff5f8afbc04ffdff4e61a147d..903605dbc1a50282e8e3d7ced0bba148ec7ebe9c 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -87,8 +87,13 @@ static void skcipher_free_async_sgls(struct skcipher_async_req *sreq) } sgl = sreq->tsg; n = sg_nents(sgl); - for_each_sg(sgl, sg, n, i) - put_page(sg_page(sg)); + for_each_sg(sgl, sg, n, i) { + struct page *page = sg_page(sg); + + /* some SGs may not have a page mapped */ + if (page && page_ref_count(page)) + put_page(page); + } kfree(sreq->tsg); } diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c index 8b3c04d625c3bb0ee667a8a61a0c1e4499b0b486..4a45fa4890c0e45eb74124d39b9fee35823ac4dd 100644 --- a/crypto/chacha20_generic.c +++ b/crypto/chacha20_generic.c @@ -91,9 +91,14 @@ int crypto_chacha20_crypt(struct skcipher_request *req) crypto_chacha20_init(state, ctx, walk.iv); while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr, - walk.nbytes); - err = skcipher_walk_done(&walk, 0); + nbytes); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6ceb0e2758bbccd4f0542a5f9628e9f3e987ce7c..d54971d2d1c8694805bb8618b8673c8490f9e8bb 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -32675,6 +32675,10 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = { "\x5b\x86\x2f\x37\x30\xe3\x7c\xfd" "\xc4\xfd\x80\x6c\x22\xf2\x21", .rlen = 375, + .also_non_np = 1, + .np = 3, + .tap = { 375 - 20, 4, 16 }, + }, { /* RFC7539 A.2. Test Vector #3 */ .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a" "\xf3\x33\x88\x86\x04\xf6\xb5\xf0" @@ -33049,6 +33053,9 @@ static const struct cipher_testvec chacha20_enc_tv_template[] = { "\xa1\xed\xad\xd5\x76\xfa\x24\x8f" "\x98", .rlen = 1281, + .also_non_np = 1, + .np = 3, + .tap = { 1200, 1, 80 }, }, }; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f7665c31fecaf3f46b64e17a821f9b8528def937..831cdd7d197dbed87d8a55c530af8df50da15419 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3362,7 +3362,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) const char *failure_string; struct binder_buffer *buffer; - if (proc->tsk != current) + if (proc->tsk != current->group_leader) return -EINVAL; if ((vma->vm_end - vma->vm_start) > SZ_4M) diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c index 1a50cd3b4233bdf467049f3308c636fe32c0caf4..9b34dff6453633fcc9cf63393eb6fe553af29dbe 100644 --- a/drivers/ata/ahci_da850.c +++ b/drivers/ata/ahci_da850.c @@ -216,12 +216,16 @@ static int ahci_da850_probe(struct platform_device *pdev) return rc; res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!res) + if (!res) { + rc = -ENODEV; goto disable_resources; + } pwrdn_reg = devm_ioremap(dev, res->start, resource_size(res)); - if (!pwrdn_reg) + if (!pwrdn_reg) { + rc = -ENOMEM; goto disable_resources; + } da850_sata_init(dev, pwrdn_reg, hpriv->mmio, mpy); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fa7dd4394c02b642c00119bd00a4b59b18921a0d..1945a8ea20998490b48aa70dfe56c4fbaad744ea 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2411,6 +2411,9 @@ static void ata_dev_config_trusted(struct ata_device *dev) u64 trusted_cap; unsigned int err; + if (!ata_id_has_trusted(dev->id)) + return; + if (!ata_identify_page_supported(dev, ATA_LOG_SECURITY)) { ata_dev_warn(dev, "Security Log not supported\n"); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ef8334949b4217d9d0f1f964432c16a56ed244c4..f321b96405f55746490b50132f583c0124b192bb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) } static int -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, - loff_t logical_blocksize) +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; @@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, lo->lo_offset = offset; if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) { - lo->lo_logical_blocksize = logical_blocksize; - blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize); - blk_queue_logical_block_size(lo->lo_queue, - lo->lo_logical_blocksize); - } set_capacity(lo->lo_disk, x); bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); /* let user-space know about the new size */ @@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo) struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; - int lo_bits = 9; /* * We use punch hole to reclaim the free space used by the @@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = inode->i_sb->s_blocksize; q->limits.discard_alignment = 0; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) - lo_bits = blksize_bits(lo->lo_logical_blocksize); - blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits); - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits); + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } @@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->use_dio = false; lo->lo_blocksize = lo_blocksize; - lo->lo_logical_blocksize = 512; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); - int lo_flags = lo->lo_flags; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && @@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (err) goto exit; - if (info->lo_flags & LO_FLAGS_BLOCKSIZE) { - if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE)) - lo->lo_logical_blocksize = 512; - lo->lo_flags |= LO_FLAGS_BLOCKSIZE; - if (LO_INFO_BLOCKSIZE(info) != 512 && - LO_INFO_BLOCKSIZE(info) != 1024 && - LO_INFO_BLOCKSIZE(info) != 2048 && - LO_INFO_BLOCKSIZE(info) != 4096) - return -EINVAL; - if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize) - return -EINVAL; - } - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit || - lo->lo_flags != lo_flags || - ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) && - lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) { - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit, - LO_INFO_BLOCKSIZE(info))) { + lo->lo_sizelimit != info->lo_sizelimit) { + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto exit; } @@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit, - lo->lo_logical_blocksize); + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int loop_set_dio(struct loop_device *lo, unsigned long arg) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 2c096b9a17b8ccd756065d5102b293796a5833ad..fecd3f97ef8c7cd9f825e6c58777a1a2bc6f3461 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -49,7 +49,6 @@ struct loop_device { struct file * lo_backing_file; struct block_device *lo_device; unsigned lo_blocksize; - unsigned lo_logical_blocksize; void *key_data; gfp_t old_gfp_mask; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1498b899a593e31951c835f4f537d1bb03d0e596..d3d5523862c227d86166a56fba230467b9b545fc 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work) struct request_queue *q = vblk->disk->queue; char cap_str_2[10], cap_str_10[10]; char *envp[] = { "RESIZE=1", NULL }; + unsigned long long nblocks; u64 capacity; /* Host must always specify the capacity. */ @@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work) capacity = (sector_t)-1; } - string_get_size(capacity, queue_logical_block_size(q), + nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); + + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); - string_get_size(capacity, queue_logical_block_size(q), + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); dev_notice(&vdev->dev, - "new size: %llu %d-byte logical blocks (%s/%s)\n", - (unsigned long long)capacity, - queue_logical_block_size(q), - cap_str_10, cap_str_2); + "new size: %llu %d-byte logical blocks (%s/%s)\n", + nblocks, + queue_logical_block_size(q), + cap_str_10, + cap_str_2); set_capacity(vblk->disk, capacity); revalidate_disk(vblk->disk); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 792da683e70dafafa6f69e224b8e57272d3e6be1..2adb8599be93147bf7e9e89d3d65d43b6305bff0 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -244,6 +244,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) { struct pending_req *req, *n; unsigned int j, r; + bool busy = false; for (r = 0; r < blkif->nr_rings; r++) { struct xen_blkif_ring *ring = &blkif->rings[r]; @@ -261,8 +262,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) * don't have any discard_io or other_io requests. So, checking * for inflight IO is enough. */ - if (atomic_read(&ring->inflight) > 0) - return -EBUSY; + if (atomic_read(&ring->inflight) > 0) { + busy = true; + continue; + } if (ring->irq) { unbind_from_irqhandler(ring->irq, ring); @@ -300,6 +303,9 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); ring->active = false; } + if (busy) + return -EBUSY; + blkif->nr_ring_pages = 0; /* * blkif->rings was allocated in connect_ring, so we should free it in diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c index dcbbb4ea3cc1d7040a847799fad2415e44a24e08..89527bae4602a906bde52ee056bb9acb0306c01a 100644 --- a/drivers/char/agp/ali-agp.c +++ b/drivers/char/agp/ali-agp.c @@ -381,7 +381,7 @@ static void agp_ali_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -static struct pci_device_id agp_ali_pci_table[] = { +static const struct pci_device_id agp_ali_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c index 5fbd333e4c6d74bbd2e34f25ac74648f9b13a62d..b450544dcaf0fc3c188b3eabfea7319438709ed7 100644 --- a/drivers/char/agp/amd-k7-agp.c +++ b/drivers/char/agp/amd-k7-agp.c @@ -21,7 +21,7 @@ #define AMD_TLBFLUSH 0x0c /* In mmio region (32-bit register) */ #define AMD_CACHEENTRY 0x10 /* In mmio region (32-bit register) */ -static struct pci_device_id agp_amdk7_pci_table[]; +static const struct pci_device_id agp_amdk7_pci_table[]; struct amd_page_map { unsigned long *real; @@ -508,7 +508,7 @@ static int agp_amdk7_resume(struct pci_dev *pdev) #endif /* CONFIG_PM */ /* must be the same order as name table above */ -static struct pci_device_id agp_amdk7_pci_table[] = { +static const struct pci_device_id agp_amdk7_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index c99cd19d91479ce62ca1871925fc046f9efa16fe..e50c29c97ca74d20542a176387d3de8ee4780b79 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -610,7 +610,7 @@ static int agp_amd64_resume(struct pci_dev *pdev) #endif /* CONFIG_PM */ -static struct pci_device_id agp_amd64_pci_table[] = { +static const struct pci_device_id agp_amd64_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c index 0b5ec7af2414bb924fa1a927aafe0d4115492281..88b4cbee4dac1cf748aa8a5799e7c1654ef5929c 100644 --- a/drivers/char/agp/ati-agp.c +++ b/drivers/char/agp/ati-agp.c @@ -540,7 +540,7 @@ static void agp_ati_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -static struct pci_device_id agp_ati_pci_table[] = { +static const struct pci_device_id agp_ati_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c index 533cb6d229b8ed2e974ae94801ec80f5b456a133..7f88490b5479c3f47341712d01f36095c0477b0e 100644 --- a/drivers/char/agp/efficeon-agp.c +++ b/drivers/char/agp/efficeon-agp.c @@ -427,7 +427,7 @@ static int agp_efficeon_resume(struct pci_dev *pdev) } #endif -static struct pci_device_id agp_efficeon_pci_table[] = { +static const struct pci_device_id agp_efficeon_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 0a21daed5b6251edd8136bc014d073ab734a2a0a..9e4f27a6cb5afad0034f1636a12a2b775cb36663 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -828,7 +828,7 @@ static int agp_intel_resume(struct pci_dev *pdev) } #endif -static struct pci_device_id agp_intel_pci_table[] = { +static const struct pci_device_id agp_intel_pci_table[] = { #define ID(x) \ { \ .class = (PCI_CLASS_BRIDGE_HOST << 8), \ diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c index 6c8d39cb566e32ea4ac14c818f3bd16962823097..828b34445203fc76138236cf8266f904cab28db4 100644 --- a/drivers/char/agp/nvidia-agp.c +++ b/drivers/char/agp/nvidia-agp.c @@ -420,7 +420,7 @@ static int agp_nvidia_resume(struct pci_dev *pdev) #endif -static struct pci_device_id agp_nvidia_pci_table[] = { +static const struct pci_device_id agp_nvidia_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c index 2c74038da459ac039e0f1490be8e034e8bccda70..14909fc5d767f42d6039a2c58398e38ad58c680e 100644 --- a/drivers/char/agp/sis-agp.c +++ b/drivers/char/agp/sis-agp.c @@ -237,7 +237,7 @@ static int agp_sis_resume(struct pci_dev *pdev) #endif /* CONFIG_PM */ -static struct pci_device_id agp_sis_pci_table[] = { +static const struct pci_device_id agp_sis_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c index fdced547ad5961ff722cfb438c0e33de9f5fabb2..c381c8e396fcc0e43b5a1b2a5ebd0c9e80ad732f 100644 --- a/drivers/char/agp/uninorth-agp.c +++ b/drivers/char/agp/uninorth-agp.c @@ -679,7 +679,7 @@ static void agp_uninorth_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -static struct pci_device_id agp_uninorth_pci_table[] = { +static const struct pci_device_id agp_uninorth_pci_table[] = { { .class = (PCI_CLASS_BRIDGE_HOST << 8), .class_mask = ~0, diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 56e0a0e1b6001e516447d1b7a513861951705a2c..9a302799040e4529bc5d08ce7c17343421ea19b6 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -48,7 +48,7 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); */ u64 dma_fence_context_alloc(unsigned num) { - BUG_ON(!num); + WARN_ON(!num); return atomic64_add_return(num, &dma_fence_context_counter) - num; } EXPORT_SYMBOL(dma_fence_context_alloc); @@ -172,7 +172,7 @@ void dma_fence_release(struct kref *kref) trace_dma_fence_destroy(fence); - BUG_ON(!list_empty(&fence->cb_list)); + WARN_ON(!list_empty(&fence->cb_list)); if (fence->ops->release) fence->ops->release(fence); diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 393817e849ed2a662e8696867bb1297dad9c5397..dec3a815455d6712864cc75e18c6f8210cebe06b 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -195,8 +195,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, if (old) kfree_rcu(old, rcu); - if (old_fence) - dma_fence_put(old_fence); + dma_fence_put(old_fence); } /** @@ -258,11 +257,70 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, dma_fence_put(rcu_dereference_protected(old->shared[i], reservation_object_held(obj))); - if (old_fence) - dma_fence_put(old_fence); + dma_fence_put(old_fence); } EXPORT_SYMBOL(reservation_object_add_excl_fence); +/** +* reservation_object_copy_fences - Copy all fences from src to dst. +* @dst: the destination reservation object +* @src: the source reservation object +* +* Copy all fences from src to dst. Both src->lock as well as dst-lock must be +* held. +*/ +int reservation_object_copy_fences(struct reservation_object *dst, + struct reservation_object *src) +{ + struct reservation_object_list *src_list, *dst_list; + struct dma_fence *old, *new; + size_t size; + unsigned i; + + src_list = reservation_object_get_list(src); + + if (src_list) { + size = offsetof(typeof(*src_list), + shared[src_list->shared_count]); + dst_list = kmalloc(size, GFP_KERNEL); + if (!dst_list) + return -ENOMEM; + + dst_list->shared_count = src_list->shared_count; + dst_list->shared_max = src_list->shared_count; + for (i = 0; i < src_list->shared_count; ++i) + dst_list->shared[i] = + dma_fence_get(src_list->shared[i]); + } else { + dst_list = NULL; + } + + kfree(dst->staged); + dst->staged = NULL; + + src_list = reservation_object_get_list(dst); + + old = reservation_object_get_excl(dst); + new = reservation_object_get_excl(src); + + dma_fence_get(new); + + preempt_disable(); + write_seqcount_begin(&dst->seq); + /* write_seqcount_begin provides the necessary memory barrier */ + RCU_INIT_POINTER(dst->fence_excl, new); + RCU_INIT_POINTER(dst->fence, dst_list); + write_seqcount_end(&dst->seq); + preempt_enable(); + + if (src_list) + kfree_rcu(src_list, rcu); + dma_fence_put(old); + + return 0; +} +EXPORT_SYMBOL(reservation_object_copy_fences); + /** * reservation_object_get_fences_rcu - Get an object's shared and exclusive * fences without update side lock held @@ -373,12 +431,25 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, long ret = timeout ? timeout : 1; retry: - fence = NULL; shared_count = 0; seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); - if (wait_all) { + fence = rcu_dereference(obj->fence_excl); + if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + if (!dma_fence_get_rcu(fence)) + goto unlock_retry; + + if (dma_fence_is_signaled(fence)) { + dma_fence_put(fence); + fence = NULL; + } + + } else { + fence = NULL; + } + + if (!fence && wait_all) { struct reservation_object_list *fobj = rcu_dereference(obj->fence); @@ -405,22 +476,6 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, } } - if (!shared_count) { - struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl); - - if (fence_excl && - !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence_excl->flags)) { - if (!dma_fence_get_rcu(fence_excl)) - goto unlock_retry; - - if (dma_fence_is_signaled(fence_excl)) - dma_fence_put(fence_excl); - else - fence = fence_excl; - } - } - rcu_read_unlock(); if (fence) { if (read_seqcount_retry(&obj->seq, seq)) { diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 69c5ff36e2f9e4f57ab72b87da2634332c97fe55..38cc7389a6c172c4f22ac89ec3694220a2083dc1 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -96,9 +96,9 @@ static struct sync_timeline *sync_timeline_create(const char *name) obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name)); - INIT_LIST_HEAD(&obj->child_list_head); - INIT_LIST_HEAD(&obj->active_list_head); - spin_lock_init(&obj->child_list_lock); + obj->pt_tree = RB_ROOT; + INIT_LIST_HEAD(&obj->pt_list); + spin_lock_init(&obj->lock); sync_timeline_debug_add(obj); @@ -125,68 +125,6 @@ static void sync_timeline_put(struct sync_timeline *obj) kref_put(&obj->kref, sync_timeline_free); } -/** - * sync_timeline_signal() - signal a status change on a sync_timeline - * @obj: sync_timeline to signal - * @inc: num to increment on timeline->value - * - * A sync implementation should call this any time one of it's fences - * has signaled or has an error condition. - */ -static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) -{ - unsigned long flags; - struct sync_pt *pt, *next; - - trace_sync_timeline(obj); - - spin_lock_irqsave(&obj->child_list_lock, flags); - - obj->value += inc; - - list_for_each_entry_safe(pt, next, &obj->active_list_head, - active_list) { - if (dma_fence_is_signaled_locked(&pt->base)) - list_del_init(&pt->active_list); - } - - spin_unlock_irqrestore(&obj->child_list_lock, flags); -} - -/** - * sync_pt_create() - creates a sync pt - * @parent: fence's parent sync_timeline - * @size: size to allocate for this pt - * @inc: value of the fence - * - * Creates a new sync_pt as a child of @parent. @size bytes will be - * allocated allowing for implementation specific data to be kept after - * the generic sync_timeline struct. Returns the sync_pt object or - * NULL in case of error. - */ -static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, - unsigned int value) -{ - unsigned long flags; - struct sync_pt *pt; - - if (size < sizeof(*pt)) - return NULL; - - pt = kzalloc(size, GFP_KERNEL); - if (!pt) - return NULL; - - spin_lock_irqsave(&obj->child_list_lock, flags); - sync_timeline_get(obj); - dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, - obj->context, value); - list_add_tail(&pt->child_list, &obj->child_list_head); - INIT_LIST_HEAD(&pt->active_list); - spin_unlock_irqrestore(&obj->child_list_lock, flags); - return pt; -} - static const char *timeline_fence_get_driver_name(struct dma_fence *fence) { return "sw_sync"; @@ -203,13 +141,17 @@ static void timeline_fence_release(struct dma_fence *fence) { struct sync_pt *pt = dma_fence_to_sync_pt(fence); struct sync_timeline *parent = dma_fence_parent(fence); - unsigned long flags; - spin_lock_irqsave(fence->lock, flags); - list_del(&pt->child_list); - if (!list_empty(&pt->active_list)) - list_del(&pt->active_list); - spin_unlock_irqrestore(fence->lock, flags); + if (!list_empty(&pt->link)) { + unsigned long flags; + + spin_lock_irqsave(fence->lock, flags); + if (!list_empty(&pt->link)) { + list_del(&pt->link); + rb_erase(&pt->node, &parent->pt_tree); + } + spin_unlock_irqrestore(fence->lock, flags); + } sync_timeline_put(parent); dma_fence_free(fence); @@ -219,18 +161,11 @@ static bool timeline_fence_signaled(struct dma_fence *fence) { struct sync_timeline *parent = dma_fence_parent(fence); - return (fence->seqno > parent->value) ? false : true; + return !__dma_fence_is_later(fence->seqno, parent->value); } static bool timeline_fence_enable_signaling(struct dma_fence *fence) { - struct sync_pt *pt = dma_fence_to_sync_pt(fence); - struct sync_timeline *parent = dma_fence_parent(fence); - - if (timeline_fence_signaled(fence)) - return false; - - list_add_tail(&pt->active_list, &parent->active_list_head); return true; } @@ -259,6 +194,107 @@ static const struct dma_fence_ops timeline_fence_ops = { .timeline_value_str = timeline_fence_timeline_value_str, }; +/** + * sync_timeline_signal() - signal a status change on a sync_timeline + * @obj: sync_timeline to signal + * @inc: num to increment on timeline->value + * + * A sync implementation should call this any time one of it's fences + * has signaled or has an error condition. + */ +static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) +{ + struct sync_pt *pt, *next; + + trace_sync_timeline(obj); + + spin_lock_irq(&obj->lock); + + obj->value += inc; + + list_for_each_entry_safe(pt, next, &obj->pt_list, link) { + if (!timeline_fence_signaled(&pt->base)) + break; + + list_del_init(&pt->link); + rb_erase(&pt->node, &obj->pt_tree); + + /* + * A signal callback may release the last reference to this + * fence, causing it to be freed. That operation has to be + * last to avoid a use after free inside this loop, and must + * be after we remove the fence from the timeline in order to + * prevent deadlocking on timeline->lock inside + * timeline_fence_release(). + */ + dma_fence_signal_locked(&pt->base); + } + + spin_unlock_irq(&obj->lock); +} + +/** + * sync_pt_create() - creates a sync pt + * @parent: fence's parent sync_timeline + * @inc: value of the fence + * + * Creates a new sync_pt as a child of @parent. @size bytes will be + * allocated allowing for implementation specific data to be kept after + * the generic sync_timeline struct. Returns the sync_pt object or + * NULL in case of error. + */ +static struct sync_pt *sync_pt_create(struct sync_timeline *obj, + unsigned int value) +{ + struct sync_pt *pt; + + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + if (!pt) + return NULL; + + sync_timeline_get(obj); + dma_fence_init(&pt->base, &timeline_fence_ops, &obj->lock, + obj->context, value); + INIT_LIST_HEAD(&pt->link); + + spin_lock_irq(&obj->lock); + if (!dma_fence_is_signaled_locked(&pt->base)) { + struct rb_node **p = &obj->pt_tree.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + struct sync_pt *other; + int cmp; + + parent = *p; + other = rb_entry(parent, typeof(*pt), node); + cmp = value - other->base.seqno; + if (cmp > 0) { + p = &parent->rb_right; + } else if (cmp < 0) { + p = &parent->rb_left; + } else { + if (dma_fence_get_rcu(&other->base)) { + dma_fence_put(&pt->base); + pt = other; + goto unlock; + } + p = &parent->rb_left; + } + } + rb_link_node(&pt->node, parent, p); + rb_insert_color(&pt->node, &obj->pt_tree); + + parent = rb_next(&pt->node); + list_add_tail(&pt->link, + parent ? &rb_entry(parent, typeof(*pt), node)->link : &obj->pt_list); + } +unlock: + spin_unlock_irq(&obj->lock); + + return pt; +} + /* * *WARNING* * @@ -309,7 +345,7 @@ static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, goto err; } - pt = sync_pt_create(obj, sizeof(*pt), data.value); + pt = sync_pt_create(obj, data.value); if (!pt) { err = -ENOMEM; goto err; @@ -345,6 +381,11 @@ static long sw_sync_ioctl_inc(struct sync_timeline *obj, unsigned long arg) if (copy_from_user(&value, (void __user *)arg, sizeof(value))) return -EFAULT; + while (value > INT_MAX) { + sync_timeline_signal(obj, INT_MAX); + value -= INT_MAX; + } + sync_timeline_signal(obj, value); return 0; diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 59a3b2f8ee91049e34cb5c12392ed1c44c52b212..c4c8ecb24aa9b4e9eb233847dd9a7bbfa1cb7fe4 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -116,17 +116,15 @@ static void sync_print_fence(struct seq_file *s, static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) { struct list_head *pos; - unsigned long flags; seq_printf(s, "%s: %d\n", obj->name, obj->value); - spin_lock_irqsave(&obj->child_list_lock, flags); - list_for_each(pos, &obj->child_list_head) { - struct sync_pt *pt = - container_of(pos, struct sync_pt, child_list); + spin_lock_irq(&obj->lock); + list_for_each(pos, &obj->pt_list) { + struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } - spin_unlock_irqrestore(&obj->child_list_lock, flags); + spin_unlock_irq(&obj->lock); } static void sync_print_sync_file(struct seq_file *s, @@ -151,12 +149,11 @@ static void sync_print_sync_file(struct seq_file *s, static int sync_debugfs_show(struct seq_file *s, void *unused) { - unsigned long flags; struct list_head *pos; seq_puts(s, "objs:\n--------------\n"); - spin_lock_irqsave(&sync_timeline_list_lock, flags); + spin_lock_irq(&sync_timeline_list_lock); list_for_each(pos, &sync_timeline_list_head) { struct sync_timeline *obj = container_of(pos, struct sync_timeline, @@ -165,11 +162,11 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_obj(s, obj); seq_putc(s, '\n'); } - spin_unlock_irqrestore(&sync_timeline_list_lock, flags); + spin_unlock_irq(&sync_timeline_list_lock); seq_puts(s, "fences:\n--------------\n"); - spin_lock_irqsave(&sync_file_list_lock, flags); + spin_lock_irq(&sync_file_list_lock); list_for_each(pos, &sync_file_list_head) { struct sync_file *sync_file = container_of(pos, struct sync_file, sync_file_list); @@ -177,7 +174,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused) sync_print_sync_file(s, sync_file); seq_putc(s, '\n'); } - spin_unlock_irqrestore(&sync_file_list_lock, flags); + spin_unlock_irq(&sync_file_list_lock); return 0; } diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index 26fe8b9907b38aac09d786997bd02a4e8be1706a..d615a89f774c249e2b779a82436561822e863bbc 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -14,6 +14,7 @@ #define _LINUX_SYNC_H #include +#include #include #include @@ -24,42 +25,41 @@ * struct sync_timeline - sync object * @kref: reference count on fence. * @name: name of the sync_timeline. Useful for debugging - * @child_list_head: list of children sync_pts for this sync_timeline - * @child_list_lock: lock protecting @child_list_head and fence.status - * @active_list_head: list of active (unsignaled/errored) sync_pts + * @lock: lock protecting @pt_list and @value + * @pt_tree: rbtree of active (unsignaled/errored) sync_pts + * @pt_list: list of active (unsignaled/errored) sync_pts * @sync_timeline_list: membership in global sync_timeline_list */ struct sync_timeline { struct kref kref; char name[32]; - /* protected by child_list_lock */ + /* protected by lock */ u64 context; int value; - struct list_head child_list_head; - spinlock_t child_list_lock; - - struct list_head active_list_head; + struct rb_root pt_tree; + struct list_head pt_list; + spinlock_t lock; struct list_head sync_timeline_list; }; static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) { - return container_of(fence->lock, struct sync_timeline, child_list_lock); + return container_of(fence->lock, struct sync_timeline, lock); } /** * struct sync_pt - sync_pt object * @base: base fence object - * @child_list: sync timeline child's list - * @active_list: sync timeline active child's list + * @link: link on the sync timeline's list + * @node: node in the sync timeline's tree */ struct sync_pt { struct dma_fence base; - struct list_head child_list; - struct list_head active_list; + struct list_head link; + struct rb_node node; }; #ifdef CONFIG_SW_SYNC diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index b10cbaa82ff537b1cfb68d93d80aecdfebcd6c22..b26256f23d67fbdf58206adf2a253fcf40c50091 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -717,8 +717,8 @@ static int tegra_adma_probe(struct platform_device *pdev) tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i); tdc->irq = of_irq_get(pdev->dev.of_node, i); - if (tdc->irq < 0) { - ret = tdc->irq; + if (tdc->irq <= 0) { + ret = tdc->irq ?: -ENXIO; goto irq_dispose; } diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 24a066e1841cc76104ff9b7ada90f50c865c1aa8..a8acc197dec37760f6e6b6c44eec0abda5e97e0c 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -33,7 +33,7 @@ drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \ drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \ drm_kms_helper_common.o drm_dp_dual_mode_helper.o \ drm_simple_kms_helper.o drm_modeset_helper.o \ - drm_scdc_helper.o + drm_scdc_helper.o drm_gem_framebuffer_helper.o drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index faea6349228fe4a566464eaa467e1a77eaf5fd4d..658bac0cdc5e96094499a13c2183de9f56e6f12f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o + amdgpu_queue_mgr.o amdgpu_vf_error.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ff7bf1a9f96780408027d4273101666df7335522..12e71bbfd2228ebeaccd9188142c276653edb938 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -68,13 +68,16 @@ #include "gpu_scheduler.h" #include "amdgpu_virt.h" +#include "amdgpu_gart.h" /* * Modules parameters. */ extern int amdgpu_modeset; extern int amdgpu_vram_limit; -extern int amdgpu_gart_size; +extern int amdgpu_vis_vram_limit; +extern unsigned amdgpu_gart_size; +extern int amdgpu_gtt_size; extern int amdgpu_moverate; extern int amdgpu_benchmarking; extern int amdgpu_testing; @@ -93,6 +96,7 @@ extern int amdgpu_bapm; extern int amdgpu_deep_color; extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; +extern int amdgpu_vm_fragment_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; extern int amdgpu_vm_update_mode; @@ -104,6 +108,7 @@ extern unsigned amdgpu_pcie_gen_cap; extern unsigned amdgpu_pcie_lane_cap; extern unsigned amdgpu_cg_mask; extern unsigned amdgpu_pg_mask; +extern unsigned amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern unsigned amdgpu_pp_feature_mask; @@ -369,78 +374,10 @@ struct amdgpu_clock { }; /* - * BO. + * GEM. */ -struct amdgpu_bo_list_entry { - struct amdgpu_bo *robj; - struct ttm_validate_buffer tv; - struct amdgpu_bo_va *bo_va; - uint32_t priority; - struct page **user_pages; - int user_invalidated; -}; - -struct amdgpu_bo_va_mapping { - struct list_head list; - struct rb_node rb; - uint64_t start; - uint64_t last; - uint64_t __subtree_last; - uint64_t offset; - uint64_t flags; -}; - -/* bo virtual addresses in a specific vm */ -struct amdgpu_bo_va { - /* protected by bo being reserved */ - struct list_head bo_list; - struct dma_fence *last_pt_update; - unsigned ref_count; - - /* protected by vm mutex and spinlock */ - struct list_head vm_status; - - /* mappings for this bo_va */ - struct list_head invalids; - struct list_head valids; - - /* constant after initialization */ - struct amdgpu_vm *vm; - struct amdgpu_bo *bo; -}; #define AMDGPU_GEM_DOMAIN_MAX 0x3 - -struct amdgpu_bo { - /* Protected by tbo.reserved */ - u32 prefered_domains; - u32 allowed_domains; - struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; - struct ttm_placement placement; - struct ttm_buffer_object tbo; - struct ttm_bo_kmap_obj kmap; - u64 flags; - unsigned pin_count; - void *kptr; - u64 tiling_flags; - u64 metadata_flags; - void *metadata; - u32 metadata_size; - unsigned prime_shared_count; - /* list of all virtual address to which this bo - * is associated to - */ - struct list_head va; - /* Constant after initialization */ - struct drm_gem_object gem_base; - struct amdgpu_bo *parent; - struct amdgpu_bo *shadow; - - struct ttm_bo_kmap_obj dma_buf_vmap; - struct amdgpu_mn *mn; - struct list_head mn_list; - struct list_head shadow_list; -}; #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) void amdgpu_gem_object_free(struct drm_gem_object *obj); @@ -531,49 +468,6 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); -/* - * GART structures, functions & helpers - */ -struct amdgpu_mc; - -#define AMDGPU_GPU_PAGE_SIZE 4096 -#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) -#define AMDGPU_GPU_PAGE_SHIFT 12 -#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) - -struct amdgpu_gart { - dma_addr_t table_addr; - struct amdgpu_bo *robj; - void *ptr; - unsigned num_gpu_pages; - unsigned num_cpu_pages; - unsigned table_size; -#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS - struct page **pages; -#endif - bool ready; - - /* Asic default pte flags */ - uint64_t gart_pte_flags; - - const struct amdgpu_gart_funcs *gart_funcs; -}; - -int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); -void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); -int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); -void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); -int amdgpu_gart_init(struct amdgpu_device *adev); -void amdgpu_gart_fini(struct amdgpu_device *adev); -int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, - int pages); -int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, - int pages, struct page **pagelist, - dma_addr_t *dma_addr, uint64_t flags); -int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); - /* * VMHUB structures, functions & helpers */ @@ -598,22 +492,20 @@ struct amdgpu_mc { * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; - u64 gtt_size; - u64 gtt_start; - u64 gtt_end; + u64 gart_size; + u64 gart_start; + u64 gart_end; u64 vram_start; u64 vram_end; unsigned vram_width; u64 real_vram_size; int vram_mtrr; - u64 gtt_base_align; u64 mc_mask; const struct firmware *fw; /* MC firmware */ uint32_t fw_version; struct amdgpu_irq_src vm_fault; uint32_t vram_type; uint32_t srbm_soft_reset; - struct amdgpu_mode_mc_save save; bool prt_warning; uint64_t stolen_size; /* apertures */ @@ -719,15 +611,15 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT /* overlap the doorbell assignment with VCN as they are mutually exclusive * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD */ - AMDGPU_DOORBELL64_RING0_1 = 0xF8, - AMDGPU_DOORBELL64_RING2_3 = 0xF9, - AMDGPU_DOORBELL64_RING4_5 = 0xFA, - AMDGPU_DOORBELL64_RING6_7 = 0xFB, + AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8, + AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9, + AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA, + AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB, - AMDGPU_DOORBELL64_UVD_RING0_1 = 0xFC, - AMDGPU_DOORBELL64_UVD_RING2_3 = 0xFD, - AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFE, - AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFF, + AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC, + AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD, + AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, + AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0xFFFF @@ -857,6 +749,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; + struct amdgpu_bo_va *csa_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; @@ -866,6 +759,14 @@ struct amdgpu_fpriv { /* * residency list */ +struct amdgpu_bo_list_entry { + struct amdgpu_bo *robj; + struct ttm_validate_buffer tv; + struct amdgpu_bo_va *bo_va; + uint32_t priority; + struct page **user_pages; + int user_invalidated; +}; struct amdgpu_bo_list { struct mutex lock; @@ -1159,7 +1060,9 @@ struct amdgpu_cs_parser { struct list_head validated; struct dma_fence *fence; uint64_t bytes_moved_threshold; + uint64_t bytes_moved_vis_threshold; uint64_t bytes_moved; + uint64_t bytes_moved_vis; struct amdgpu_bo_list_entry *evictable; /* user fence */ @@ -1230,8 +1133,6 @@ struct amdgpu_wb { int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb); -int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb); -void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb); void amdgpu_get_pcie_info(struct amdgpu_device *adev); @@ -1525,7 +1426,7 @@ struct amdgpu_device { bool is_atom_fw; uint8_t *bios; uint32_t bios_size; - struct amdgpu_bo *stollen_vga_memory; + struct amdgpu_bo *stolen_vga_memory; uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -1557,6 +1458,10 @@ struct amdgpu_device { spinlock_t gc_cac_idx_lock; amdgpu_rreg_t gc_cac_rreg; amdgpu_wreg_t gc_cac_wreg; + /* protects concurrent se_cac register access */ + spinlock_t se_cac_idx_lock; + amdgpu_rreg_t se_cac_rreg; + amdgpu_wreg_t se_cac_wreg; /* protects concurrent ENDPOINT (audio) register access */ spinlock_t audio_endpt_idx_lock; amdgpu_block_rreg_t audio_endpt_rreg; @@ -1579,9 +1484,6 @@ struct amdgpu_device { struct amdgpu_mman mman; struct amdgpu_vram_scratch vram_scratch; struct amdgpu_wb wb; - atomic64_t vram_usage; - atomic64_t vram_vis_usage; - atomic64_t gtt_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; atomic64_t num_vram_cpu_page_faults; @@ -1593,6 +1495,7 @@ struct amdgpu_device { spinlock_t lock; s64 last_update_us; s64 accum_us; /* accumulated microseconds */ + s64 accum_us_vis; /* for visible VRAM */ u32 log2_max_MBps; } mm_stats; @@ -1687,6 +1590,8 @@ struct amdgpu_device { bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; + /* record last mm index being written through WREG32*/ + unsigned long last_mm_index; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1742,6 +1647,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v)) #define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg)) #define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v)) +#define RREG32_SE_CAC(reg) adev->se_cac_rreg(adev, (reg)) +#define WREG32_SE_CAC(reg, v) adev->se_cac_wreg(adev, (reg), (v)) #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg)) #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v)) #define WREG32_P(reg, val, mask) \ @@ -1792,50 +1699,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8)) #define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16)) -/* - * RING helpers. - */ -static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) -{ - if (ring->count_dw <= 0) - DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - ring->ring[ring->wptr++ & ring->buf_mask] = v; - ring->wptr &= ring->ptr_mask; - ring->count_dw--; -} - -static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) -{ - unsigned occupied, chunk1, chunk2; - void *dst; - - if (unlikely(ring->count_dw < count_dw)) { - DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - return; - } - - occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; - chunk1 = ring->buf_mask + 1 - occupied; - chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; - chunk2 = count_dw - chunk1; - chunk1 <<= 2; - chunk2 <<= 2; - - if (chunk1) - memcpy(dst, src, chunk1); - - if (chunk2) { - src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); - } - - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; - ring->count_dw -= count_dw; -} - static inline struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) { @@ -1898,7 +1761,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) -#define amdgpu_display_set_vga_render_state(adev, r) (adev)->mode_info.funcs->set_vga_render_state((adev), (r)) #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) #define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) @@ -1911,8 +1773,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos)) #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) -#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) -#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) @@ -1927,7 +1787,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); @@ -1943,7 +1804,7 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); -void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); +void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 06879d1dcabd320045560a35edb8f606a35df46a..a52795d9b45852a371fed81a8f17a51d63bab120 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -285,19 +285,20 @@ static int acp_hw_init(void *handle) return 0; else if (r) return r; + if (adev->asic_type != CHIP_STONEY) { + adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); + if (adev->acp.acp_genpd == NULL) + return -ENOMEM; - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) - return -ENOMEM; - - adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; - adev->acp.acp_genpd->gpd.power_off = acp_poweroff; - adev->acp.acp_genpd->gpd.power_on = acp_poweron; + adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; + adev->acp.acp_genpd->gpd.power_off = acp_poweroff; + adev->acp.acp_genpd->gpd.power_on = acp_poweron; - adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; + adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; - pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + } adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS, GFP_KERNEL); @@ -319,14 +320,29 @@ static int acp_hw_init(void *handle) return -ENOMEM; } - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } i2s_pdata[0].cap = DWC_I2S_PLAY; i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; i2s_pdata[1].cap = DWC_I2S_RECORD; i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; @@ -373,12 +389,14 @@ static int acp_hw_init(void *handle) if (r) return r; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - return r; + if (adev->asic_type != CHIP_STONEY) { + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); + if (r) { + dev_err(dev, "Failed to add dev to genpd\n"); + return r; + } } } @@ -398,20 +416,22 @@ static int acp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ - if (!adev->acp.acp_genpd) + if (!adev->acp.acp_cell) return 0; - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); + if (adev->acp.acp_genpd) { + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); + /* If removal fails, dont giveup and try rest */ + if (ret) + dev_err(dev, "remove dev from genpd failed\n"); + } + kfree(adev->acp.acp_genpd); } mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); - kfree(adev->acp.acp_genpd); kfree(adev->acp.acp_cell); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index ef79551b4cb7acde9467cdb7bcefcf64a3561f39..57afad79f55d086b673cd6c61c688d532620a033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -30,10 +30,10 @@ #include #include #include "amdgpu.h" +#include "amdgpu_pm.h" #include "amd_acpi.h" #include "atom.h" -extern void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); /* Call the ATIF method */ /** @@ -289,7 +289,7 @@ static int amdgpu_atif_get_sbios_requests(acpi_handle handle, * handles it. * Returns NOTIFY code */ -int amdgpu_atif_handler(struct amdgpu_device *adev, +static int amdgpu_atif_handler(struct amdgpu_device *adev, struct acpi_bus_event *event) { struct amdgpu_atif *atif = &adev->atif; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 37971d9402e3f206910e99dd29e8a72a7b4f93b1..5432af39a674a9b2d3c38039e17433592c4bea86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -27,16 +27,15 @@ #include "amdgpu_gfx.h" #include -const struct kfd2kgd_calls *kfd2kgd; const struct kgd2kfd_calls *kgd2kfd; -bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); +bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); int amdgpu_amdkfd_init(void) { int ret; #if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**); + int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); kgd2kfd_init_p = symbol_request(kgd2kfd_init); @@ -61,8 +60,21 @@ int amdgpu_amdkfd_init(void) return ret; } -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) +void amdgpu_amdkfd_fini(void) +{ + if (kgd2kfd) { + kgd2kfd->exit(); + symbol_put(kgd2kfd_init); + } +} + +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { + const struct kfd2kgd_calls *kfd2kgd; + + if (!kgd2kfd) + return; + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: @@ -73,25 +85,12 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: - return false; + dev_info(adev->dev, "kfd not supported on this ASIC\n"); + return; } - return true; -} - -void amdgpu_amdkfd_fini(void) -{ - if (kgd2kfd) { - kgd2kfd->exit(); - symbol_put(kgd2kfd_init); - } -} - -void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) -{ - if (kgd2kfd) - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); } void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) @@ -184,7 +183,8 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, + &(*mem)->bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 73f83a10ae1485dac5adcfe66358757549ff05d4..8d689ab7e4291fad585448c6e679aa32556facff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -26,6 +26,7 @@ #define AMDGPU_AMDKFD_H_INCLUDED #include +#include #include struct amdgpu_device; @@ -39,8 +40,6 @@ struct kgd_mem { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev); - void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, @@ -62,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +#define read_user_wptr(mmptr, wptr, dst) \ + ({ \ + bool valid = false; \ + if ((mmptr) && (wptr)) { \ + if ((mmptr) == current->mm) { \ + valid = !get_user((dst), (wptr)); \ + } else if (current->mm == NULL) { \ + use_mm(mmptr); \ + valid = !get_user((dst), (wptr)); \ + unuse_mm(mmptr); \ + } \ + } \ + valid; \ + }) + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 5254562fd0f9fd3f8489ca563fe12eabe2290689..b9dbbf9cb8b07eea5a367822131e249dd17f1e94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -39,6 +39,12 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ MAX_WATCH_ADDRESSES = 4 @@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -126,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -150,7 +186,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -186,7 +224,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); @@ -290,20 +328,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t wptr_shadow, is_wptr_shadow_valid; struct cik_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; m = get_mqd(mqd); - is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - if (is_wptr_shadow_valid) - m->cp_hqd_pq_wptr = wptr_shadow; - acquire_queue(kgd, pipe_id, queue_id); - gfx_v7_0_mqd_commit(adev, m); + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + /* Copy userspace write pointer value to register. + * Activate doorbell logic to monitor subsequent changes. + */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); + + if (read_user_wptr(mm, wptr, wptr_val)) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(mmCP_HQD_ACTIVE, data); + release_queue(kgd); return 0; @@ -382,30 +438,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - int timeout = utimeout; + enum hqd_dequeue_request_type type; + unsigned long flags, end_jiffies; + int retry; acquire_queue(kgd, pipe_id, queue_id); WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; - if (timeout <= 0) { - pr_err("kfd: cp queue preemption time out.\n"); + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out\n"); release_queue(kgd); return -ETIME; } - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } release_queue(kgd); @@ -556,6 +681,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + lock_srbm(kgd, 0, 0, 0, vmid); + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); + unlock_srbm(kgd); +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -566,42 +701,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 133d06671e46cb48461cfc061b0be8a7c0c98b41..fb6e5dbd5a03558d4ad06049b7fe581f18a801ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -39,6 +39,12 @@ #include "vi_structs.h" #include "vid.h" +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + struct cik_sdma_rlc_registers; /* @@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, @@ -85,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -111,12 +147,15 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, .write_vmid_invalidate_request = write_vmid_invalidate_request, - .get_fw_version = get_fw_version + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = get_tile_config, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) { return (struct kfd2kgd_calls *)&kfd2kgd; + return (struct kfd2kgd_calls *)&kfd2kgd; } static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -147,7 +186,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); @@ -216,7 +255,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -244,20 +283,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) { - struct vi_mqd *m; - uint32_t shadow_wptr, valid_wptr; struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, wptr_val, data; m = get_mqd(mqd); - valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); - if (valid_wptr == 0) - m->cp_hqd_pq_wptr = shadow_wptr; - acquire_queue(kgd, pipe_id, queue_id); - gfx_v8_0_mqd_commit(adev, mqd); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(mmRLC_CP_SCHEDULERS); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(mmRLC_CP_SCHEDULERS, value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. + * This is safe since EOP RPTR==WPTR for any inactive HQD + * on ASICs that do not support context-save. + * EOP writes/reads can start anywhere in the ring. + */ + if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) { + WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); + } + + for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++) + WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]); + + /* Copy userspace write pointer value to register. + * Activate doorbell logic to monitor subsequent changes. + */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data); + + if (read_user_wptr(mm, wptr, wptr_val)) + WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(mmCP_HQD_ACTIVE, data); + release_queue(kgd); return 0; @@ -308,29 +394,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; - int timeout = utimeout; + enum hqd_dequeue_request_type type; + unsigned long flags, end_jiffies; + int retry; + struct vi_mqd *m = get_mqd(mqd); acquire_queue(kgd, pipe_id, queue_id); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type); + if (m->cp_hqd_vmid == 0) + WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + /* Workaround: If IQ timer is active and the wait time is close to or + * equal to 0, dequeueing is not safe. Wait until either the wait time + * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is + * cleared before continuing. Also, ensure wait times are set to at + * least 0x3. + */ + local_irq_save(flags); + preempt_disable(); + retry = 5000; /* wait for 500 usecs at maximum */ + while (true) { + temp = RREG32(mmCP_HQD_IQ_TIMER); + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { + pr_debug("HW is processing IQ\n"); + goto loop; + } + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) + == 3) /* SEM-rearm is safe */ + break; + /* Wait time 3 is safe for CP, but our MMIO read/write + * time is close to 1 microsecond, so check for 10 to + * leave more buffer room + */ + if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) + >= 10) + break; + pr_debug("IQ timer is active\n"); + } else + break; +loop: + if (!retry) { + pr_err("CP HQD IQ timer status time out\n"); + break; + } + ndelay(100); + --retry; + } + retry = 1000; + while (true) { + temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); + if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) + break; + pr_debug("Dequeue request is pending\n"); + + if (!retry) { + pr_err("CP HQD dequeue request time out\n"); + break; + } + ndelay(100); + --retry; + } + local_irq_restore(flags); + preempt_enable(); + + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); - if (temp & CP_HQD_ACTIVE__ACTIVE_MASK) + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; - if (timeout <= 0) { - pr_err("kfd: cp queue preemption time out.\n"); + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); release_queue(kgd); return -ETIME; } - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } release_queue(kgd); @@ -444,6 +603,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, return 0; } +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + lock_srbm(kgd, 0, 0, 0, vmid); + WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va); + unlock_srbm(kgd); +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; @@ -454,42 +623,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) - adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw->data; break; case KGD_ENGINE_ME: hdr = (const union amdgpu_firmware_header *) - adev->gfx.me_fw->data; + adev->gfx.me_fw->data; break; case KGD_ENGINE_CE: hdr = (const union amdgpu_firmware_header *) - adev->gfx.ce_fw->data; + adev->gfx.ce_fw->data; break; case KGD_ENGINE_MEC1: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec_fw->data; + adev->gfx.mec_fw->data; break; case KGD_ENGINE_MEC2: hdr = (const union amdgpu_firmware_header *) - adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw->data; break; case KGD_ENGINE_RLC: hdr = (const union amdgpu_firmware_header *) - adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw->data; break; case KGD_ENGINE_SDMA1: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[0].fw->data; + adev->sdma.instance[0].fw->data; break; case KGD_ENGINE_SDMA2: hdr = (const union amdgpu_firmware_header *) - adev->sdma.instance[1].fw->data; + adev->sdma.instance[1].fw->data; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 1e8e1123ddf416f18176cbc6e82fa791b3df9fb5..ce443586a0c71c13bd36472558473c0beae9b081 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1686,7 +1686,7 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) { uint32_t bios_6_scratch; - bios_6_scratch = RREG32(mmBIOS_SCRATCH_6); + bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6); if (lock) { bios_6_scratch |= ATOM_S6_CRITICAL_STATE; @@ -1696,15 +1696,17 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) bios_6_scratch |= ATOM_S6_ACC_MODE; } - WREG32(mmBIOS_SCRATCH_6, bios_6_scratch); + WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) { uint32_t bios_2_scratch, bios_6_scratch; - bios_2_scratch = RREG32(mmBIOS_SCRATCH_2); - bios_6_scratch = RREG32(mmBIOS_SCRATCH_6); + adev->bios_scratch_reg_offset = mmBIOS_SCRATCH_0; + + bios_2_scratch = RREG32(adev->bios_scratch_reg_offset + 2); + bios_6_scratch = RREG32(adev->bios_scratch_reg_offset + 6); /* let the bios control the backlight */ bios_2_scratch &= ~ATOM_S2_VRI_BRIGHT_ENABLE; @@ -1715,8 +1717,8 @@ void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) /* clear the vbios dpms state */ bios_2_scratch &= ~ATOM_S2_DEVICE_DPMS_STATE; - WREG32(mmBIOS_SCRATCH_2, bios_2_scratch); - WREG32(mmBIOS_SCRATCH_6, bios_6_scratch); + WREG32(adev->bios_scratch_reg_offset + 2, bios_2_scratch); + WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev) @@ -1724,7 +1726,7 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev) int i; for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - adev->bios_scratch[i] = RREG32(mmBIOS_SCRATCH_0 + i); + adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); } void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) @@ -1738,20 +1740,30 @@ void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - WREG32(mmBIOS_SCRATCH_0 + i, adev->bios_scratch[i]); + WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); } void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung) { - u32 tmp = RREG32(mmBIOS_SCRATCH_3); + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3); if (hung) tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG; else tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG; - WREG32(mmBIOS_SCRATCH_3, tmp); + WREG32(adev->bios_scratch_reg_offset + 3, tmp); +} + +bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev) +{ + u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7); + + if (tmp & ATOM_S7_ASIC_INIT_COMPLETE_MASK) + return false; + else + return true; } /* Atom needs data in little endian format diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 38d0fe32e5cd4218fad2244597930c90465cc7e9..b0d5d1d7fdba15d6674fdfbc744c90592cee8c8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -200,6 +200,7 @@ void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung); +bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev); void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le); int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 4bdda56fcceea36b8229d74ac3b76fe706919a77..f9ffe8ef0cd60a85ae4180f66727f791e8679622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -66,41 +66,6 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) } } -void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); -} - -void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev) -{ - int i; - - /* - * VBIOS will check ASIC_INIT_COMPLETE bit to decide if - * execute ASIC_Init posting via driver - */ - adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); -} - -void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, - bool hung) -{ - u32 tmp = RREG32(adev->bios_scratch_reg_offset + 3); - - if (hung) - tmp |= ATOM_S3_ASIC_GUI_ENGINE_HUNG; - else - tmp &= ~ATOM_S3_ASIC_GUI_ENGINE_HUNG; - - WREG32(adev->bios_scratch_reg_offset + 3, tmp); -} - int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; @@ -130,3 +95,129 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) ctx->scratch_size_bytes = usage_bytes; return 0; } + +union igp_info { + struct atom_integrated_system_info_v1_11 v11; +}; + +/* + * Return vram width from integrated system info table, if available, + * or 0 if not. + */ +int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + u16 data_offset, size; + union igp_info *igp_info; + u8 frev, crev; + + /* get any igp specific overrides */ + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 11: + return igp_info->v11.umachannelnumber * 64; + default: + return 0; + } + } + + return 0; +} + +union firmware_info { + struct atom_firmware_info_v3_1 v31; +}; + +union smu_info { + struct atom_smu_info_v3_1 v31; +}; + +union umc_info { + struct atom_umc_info_v3_1 v31; +}; + +int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + struct amdgpu_pll *spll = &adev->clock.spll; + struct amdgpu_pll *mpll = &adev->clock.mpll; + uint8_t frev, crev; + uint16_t data_offset; + int ret = -EINVAL, index; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union firmware_info *firmware_info = + (union firmware_info *)(mode_info->atom_context->bios + + data_offset); + + adev->clock.default_sclk = + le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz); + adev->clock.default_mclk = + le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz); + + adev->pm.current_sclk = adev->clock.default_sclk; + adev->pm.current_mclk = adev->clock.default_mclk; + + /* not technically a clock, but... */ + adev->mode_info.firmware_flags = + le32_to_cpu(firmware_info->v31.firmware_capability); + + ret = 0; + } + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + smu_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union smu_info *smu_info = + (union smu_info *)(mode_info->atom_context->bios + + data_offset); + + /* system clock */ + spll->reference_freq = le32_to_cpu(smu_info->v31.core_refclk_10khz); + + spll->reference_div = 0; + spll->min_post_div = 1; + spll->max_post_div = 1; + spll->min_ref_div = 2; + spll->max_ref_div = 0xff; + spll->min_feedback_div = 4; + spll->max_feedback_div = 0xff; + spll->best_vco = 0; + + ret = 0; + } + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + umc_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union umc_info *umc_info = + (union umc_info *)(mode_info->atom_context->bios + + data_offset); + + /* memory clock */ + mpll->reference_freq = le32_to_cpu(umc_info->v31.mem_refclk_10khz); + + mpll->reference_div = 0; + mpll->min_post_div = 1; + mpll->max_post_div = 1; + mpll->min_ref_div = 2; + mpll->max_ref_div = 0xff; + mpll->min_feedback_div = 4; + mpll->max_feedback_div = 0xff; + mpll->best_vco = 0; + + ret = 0; + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index a2c3ebe22c713aeacff5036d431ffc1e157f5f7e..288b97e543478b48f2298cf3e442b03f99a91d69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -26,10 +26,8 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); -void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev); -void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev); -void amdgpu_atomfirmware_scratch_regs_engine_hung(struct amdgpu_device *adev, - bool hung); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 1beae5b930d0e10d407501708a0585e56001181e..63ec1e1bb6aa17dff2ad786f2aae9600d607a38a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false); + false, false); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); @@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, n = AMDGPU_BENCHMARK_ITERATIONS; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, &sobj); + NULL, 0, &sobj); if (r) { goto out_cleanup; } @@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, &dobj); + NULL, 0, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 365e735f664744042cec72b939a06078444a69ae..c21adf60a7f200ba6a4faaa32c163157d00da9ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -86,19 +86,6 @@ static bool check_atom_bios(uint8_t *bios, size_t size) return false; } -static bool is_atom_fw(uint8_t *bios) -{ - uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8); - uint8_t frev = bios[bios_header_start + 2]; - uint8_t crev = bios[bios_header_start + 3]; - - if ((frev < 3) || - ((frev == 3) && (crev < 3))) - return false; - - return true; -} - /* If you boot an IGP board with a discrete card as the primary, * the IGP rom is not accessible via the rom bar as the IGP rom is * part of the system bios. On boot, the system bios puts a @@ -117,7 +104,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev) adev->bios = NULL; vram_base = pci_resource_start(adev->pdev, 0); - bios = ioremap(vram_base, size); + bios = ioremap_wc(vram_base, size); if (!bios) { return false; } @@ -455,6 +442,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) return false; success: - adev->is_atom_fw = is_atom_fw(adev->bios); + adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false; return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 5e771bc11b0054aa667bb75291da4cb9911521df..59089e027f4d8ac386f1174fe47eea5622ec182d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -83,7 +83,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - kfree(list); + amdgpu_bo_list_free(list); return r; } *id = r; @@ -136,7 +136,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, } bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm) { @@ -156,11 +156,11 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->tv.bo = &entry->robj->tbo; entry->tv.shared = !entry->robj->prime_shared_count; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) gws_obj = entry->robj; - if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) + if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; total_size += amdgpu_bo_size(entry->robj); @@ -270,7 +270,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_bo_list *args = data; uint32_t handle = args->in.list_handle; - const void __user *uptr = (const void*)(uintptr_t)args->in.bo_info_ptr; + const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); struct drm_amdgpu_bo_list_entry *info; struct amdgpu_bo_list *list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index c0a806280257c80954930c3744d7cd65e6b6de5f..fd435a96481c2f1c019bd4d2303ebbb9b590adad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, true, domain, flags, NULL, &placement, NULL, - &obj); + 0, &obj); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; @@ -166,7 +166,7 @@ static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t h r = amdgpu_bo_reserve(obj, true); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin_restricted(obj, obj->prefered_domains, + r = amdgpu_bo_pin_restricted(obj, obj->preferred_domains, min_offset, max_offset, mcaddr); amdgpu_bo_unreserve(obj); return r; @@ -240,6 +240,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, return RREG32_DIDT(index); case CGS_IND_REG_GC_CAC: return RREG32_GC_CAC(index); + case CGS_IND_REG_SE_CAC: + return RREG32_SE_CAC(index); case CGS_IND_REG__AUDIO_ENDPT: DRM_ERROR("audio endpt register access not implemented.\n"); return 0; @@ -266,6 +268,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, return WREG32_DIDT(index, value); case CGS_IND_REG_GC_CAC: return WREG32_GC_CAC(index, value); + case CGS_IND_REG_SE_CAC: + return WREG32_SE_CAC(index, value); case CGS_IND_REG__AUDIO_ENDPT: DRM_ERROR("audio endpt register access not implemented.\n"); return; @@ -610,6 +614,17 @@ static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device, return 0; } +static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device, + bool lock) +{ + CGS_FUNC_ADEV; + + if (lock) + mutex_lock(&adev->grbm_idx_mutex); + else + mutex_unlock(&adev->grbm_idx_mutex); +} + static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) @@ -644,7 +659,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, info->version = (uint16_t)le32_to_cpu(header->header.ucode_version); if (CGS_UCODE_ID_CP_MEC == type) - info->image_size = (header->jt_offset) << 2; + info->image_size = le32_to_cpu(header->jt_offset) << 2; info->fw_version = amdgpu_get_firmware_version(cgs_device, type); info->feature_version = (uint16_t)le32_to_cpu(header->ucode_feature_version); @@ -719,7 +734,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, strcpy(fw_name, "amdgpu/polaris12_smc.bin"); break; case CHIP_VEGA10: - strcpy(fw_name, "amdgpu/vega10_smc.bin"); + if ((adev->pdev->device == 0x687f) && + ((adev->pdev->revision == 0xc0) || + (adev->pdev->revision == 0xc1) || + (adev->pdev->revision == 0xc3))) + strcpy(fw_name, "amdgpu/vega10_acg_smc.bin"); + else + strcpy(fw_name, "amdgpu/vega10_smc.bin"); break; default: DRM_ERROR("SMC firmware not supported\n"); @@ -1117,6 +1138,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { .query_system_info = amdgpu_cgs_query_system_info, .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, .enter_safe_mode = amdgpu_cgs_enter_safe_mode, + .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5599c01b265d40c105b59b5ca0f3f14c2efdf40e..269b835571eb07f31093574fb86f662e93eb4c2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, *offset = data->offset; - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { amdgpu_bo_unref(&p->uf_entry.robj); @@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } /* get chunks */ - chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks); + chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; @@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; - chunk_ptr = (void __user *)(uintptr_t)chunk_array[i]; + chunk_ptr = u64_to_user_ptr(chunk_array[i]); if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { ret = -EFAULT; @@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; + cdata = u64_to_user_ptr(user_chunk.chunk_data); p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); if (p->chunks[i].kdata == NULL) { @@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) * ticks. The accumulated microseconds (us) are converted to bytes and * returned. */ -static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) +static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, + u64 *max_bytes, + u64 *max_vis_bytes) { s64 time_us, increment_us; - u64 max_bytes; u64 free_vram, total_vram, used_vram; /* Allow a maximum of 200 accumulated ms. This is basically per-IB @@ -238,11 +239,14 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) */ const s64 us_upper_bound = 200000; - if (!adev->mm_stats.log2_max_MBps) - return 0; + if (!adev->mm_stats.log2_max_MBps) { + *max_bytes = 0; + *max_vis_bytes = 0; + return; + } total_vram = adev->mc.real_vram_size - adev->vram_pin_size; - used_vram = atomic64_read(&adev->vram_usage); + used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); @@ -280,23 +284,46 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); } - /* This returns 0 if the driver is in debt to disallow (optional) + /* This is set to 0 if the driver is in debt to disallow (optional) * buffer moves. */ - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + + /* Do the same for visible VRAM if half of it is free */ + if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { + u64 total_vis_vram = adev->mc.visible_vram_size; + u64 used_vis_vram = + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); + + if (used_vis_vram < total_vis_vram) { + u64 free_vis_vram = total_vis_vram - used_vis_vram; + adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + + increment_us, us_upper_bound); + + if (free_vis_vram >= total_vis_vram / 2) + adev->mm_stats.accum_us_vis = + max(bytes_to_us(adev, free_vis_vram / 2), + adev->mm_stats.accum_us_vis); + } + + *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); + } else { + *max_vis_bytes = 0; + } spin_unlock(&adev->mm_stats.lock); - return max_bytes; } /* Report how many bytes have really been moved for the last command * submission. This can result in a debt that can stop buffer migrations * temporarily. */ -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) +void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, + u64 num_vis_bytes) { spin_lock(&adev->mm_stats.lock); adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); + adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); spin_unlock(&adev->mm_stats.lock); } @@ -304,7 +331,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; uint32_t domain; int r; @@ -314,17 +341,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, /* Don't move this buffer if we have depleted our allowance * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved < p->bytes_moved_threshold) - domain = bo->prefered_domains; - else + if (p->bytes_moved < p->bytes_moved_threshold) { + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + /* And don't move a CPU_ACCESS_REQUIRED BO to limited + * visible VRAM if we've depleted our allowance to do + * that. + */ + if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) + domain = bo->preferred_domains; + else + domain = bo->allowed_domains; + } else { + domain = bo->preferred_domains; + } + } else { domain = bo->allowed_domains; + } retry: amdgpu_ttm_placement_from_domain(bo, domain); initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + p->bytes_moved_vis += bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; @@ -350,7 +395,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *candidate = p->evictable; struct amdgpu_bo *bo = candidate->robj; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; + bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ @@ -370,10 +416,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); + update_bytes_moved_vis = + adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&adev->num_bytes_moved) - + bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; + p->bytes_moved += bytes_moved; + if (update_bytes_moved_vis) + p->bytes_moved_vis += bytes_moved; if (unlikely(r)) break; @@ -554,8 +607,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_splice(&need_pages, &p->validated); } - p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, + &p->bytes_moved_vis_threshold); p->bytes_moved = 0; + p->bytes_moved_vis = 0; p->evictable = list_last_entry(&p->validated, struct amdgpu_bo_list_entry, tv.head); @@ -579,8 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); - + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, + p->bytes_moved_vis); fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); @@ -619,10 +674,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) { - amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } error_free_pages: @@ -670,21 +723,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. **/ -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, + bool backoff) { - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; unsigned i; - if (!error) { - amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); - + if (!error) ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->fence); - } else if (backoff) { + else if (backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } for (i = 0; i < parser->num_post_dep_syncobjs; i++) drm_syncobj_put(parser->post_dep_syncobjs[i]); @@ -737,7 +787,8 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (amdgpu_sriov_vf(adev)) { struct dma_fence *f; - bo_va = vm->csa_bo_va; + + bo_va = fpriv->csa_va; BUG_ON(!bo_va); r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) @@ -774,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) } - r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); + r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync); if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ @@ -984,7 +1035,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_fence_get(p->filp, handle, &fence); + r = drm_syncobj_find_fence(p->filp, handle, &fence); if (r) return r; @@ -1383,7 +1434,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, if (fences == NULL) return -ENOMEM; - fences_user = (void __user *)(uintptr_t)(wait->in.fences); + fences_user = u64_to_user_ptr(wait->in.fences); if (copy_from_user(fences, fences_user, sizeof(struct drm_amdgpu_fence) * fence_count)) { r = -EFAULT; @@ -1436,7 +1487,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, addr > mapping->last) continue; - *bo = lobj->bo_va->bo; + *bo = lobj->bo_va->base.bo; return mapping; } @@ -1445,7 +1496,7 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, addr > mapping->last) continue; - *bo = lobj->bo_va->bo; + *bo = lobj->bo_va->base.bo; return mapping; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4a8fc15467cf8c40ed268f190a01acd4f506fed0..1a459ac63df453b2cab796b8ed28fd4afff59522 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -53,6 +53,9 @@ #include "bif/bif_4_1_d.h" #include #include +#include "amdgpu_vf_error.h" + +#include "amdgpu_amdkfd.h" MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -128,6 +131,10 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, { trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); + if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { + adev->last_mm_index = v; + } + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { BUG_ON(in_interrupt()); return amdgpu_virt_kiq_wreg(adev, reg, v); @@ -143,6 +150,10 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4)); spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); } + + if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { + udelay(500); + } } u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) @@ -157,6 +168,9 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { + if (adev->asic_type >= CHIP_VEGA10 && reg == 0) { + adev->last_mm_index = v; + } if ((reg * 4) < adev->rio_mem_size) iowrite32(v, adev->rio_mem + (reg * 4)); @@ -164,6 +178,10 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v) iowrite32((reg * 4), adev->rio_mem + (mmMM_INDEX * 4)); iowrite32(v, adev->rio_mem + (mmMM_DATA * 4)); } + + if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) { + udelay(500); + } } /** @@ -318,51 +336,16 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) { - int r; - - if (adev->vram_scratch.robj == NULL) { - r = amdgpu_bo_create(adev, AMDGPU_GPU_PAGE_SIZE, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->vram_scratch.robj); - if (r) { - return r; - } - } - - r = amdgpu_bo_reserve(adev->vram_scratch.robj, false); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(adev->vram_scratch.robj, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vram_scratch.gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->vram_scratch.robj); - return r; - } - r = amdgpu_bo_kmap(adev->vram_scratch.robj, - (void **)&adev->vram_scratch.ptr); - if (r) - amdgpu_bo_unpin(adev->vram_scratch.robj); - amdgpu_bo_unreserve(adev->vram_scratch.robj); - - return r; + return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->vram_scratch.robj, + &adev->vram_scratch.gpu_addr, + (void **)&adev->vram_scratch.ptr); } static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) { - int r; - - if (adev->vram_scratch.robj == NULL) { - return; - } - r = amdgpu_bo_reserve(adev->vram_scratch.robj, true); - if (likely(r == 0)) { - amdgpu_bo_kunmap(adev->vram_scratch.robj); - amdgpu_bo_unpin(adev->vram_scratch.robj); - amdgpu_bo_unreserve(adev->vram_scratch.robj); - } - amdgpu_bo_unref(&adev->vram_scratch.robj); + amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); } /** @@ -521,7 +504,8 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) int r; if (adev->wb.wb_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t), + /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ + r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->wb.wb_obj, &adev->wb.gpu_addr, (void **)&adev->wb.wb); @@ -552,32 +536,10 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) { unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); - if (offset < adev->wb.num_wb) { - __set_bit(offset, adev->wb.used); - *wb = offset; - return 0; - } else { - return -EINVAL; - } -} -/** - * amdgpu_wb_get_64bit - Allocate a wb entry - * - * @adev: amdgpu_device pointer - * @wb: wb index - * - * Allocate a wb slot for use by the driver (all asics). - * Returns 0 on success or -EINVAL on failure. - */ -int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb) -{ - unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used, - adev->wb.num_wb, 0, 2, 7, 0); - if ((offset + 1) < adev->wb.num_wb) { + if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); - __set_bit(offset + 1, adev->wb.used); - *wb = offset; + *wb = offset * 8; /* convert to dw offset */ return 0; } else { return -EINVAL; @@ -598,22 +560,6 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) __clear_bit(wb, adev->wb.used); } -/** - * amdgpu_wb_free_64bit - Free a wb entry - * - * @adev: amdgpu_device pointer - * @wb: wb index - * - * Free a wb slot allocated for use by the driver (all asics) - */ -void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) -{ - if ((wb + 1) < adev->wb.num_wb) { - __clear_bit(wb, adev->wb.used); - __clear_bit(wb + 1, adev->wb.used); - } -} - /** * amdgpu_vram_location - try to find VRAM location * @adev: amdgpu device structure holding all necessary informations @@ -665,7 +611,7 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 } /** - * amdgpu_gtt_location - try to find GTT location + * amdgpu_gart_location - try to find GTT location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @@ -676,28 +622,28 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 * * FIXME: when reducing GTT size align new size on power of 2. */ -void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) +void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { u64 size_af, size_bf; - size_af = ((adev->mc.mc_mask - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align; - size_bf = mc->vram_start & ~mc->gtt_base_align; + size_af = adev->mc.mc_mask - mc->vram_end; + size_bf = mc->vram_start; if (size_bf > size_af) { - if (mc->gtt_size > size_bf) { + if (mc->gart_size > size_bf) { dev_warn(adev->dev, "limiting GTT\n"); - mc->gtt_size = size_bf; + mc->gart_size = size_bf; } - mc->gtt_start = 0; + mc->gart_start = 0; } else { - if (mc->gtt_size > size_af) { + if (mc->gart_size > size_af) { dev_warn(adev->dev, "limiting GTT\n"); - mc->gtt_size = size_af; + mc->gart_size = size_af; } - mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align; + mc->gart_start = mc->vram_end + 1; } - mc->gtt_end = mc->gtt_start + mc->gtt_size - 1; + mc->gart_end = mc->gart_start + mc->gart_size - 1; dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", - mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end); + mc->gart_size >> 20, mc->gart_start, mc->gart_end); } /* @@ -720,7 +666,12 @@ bool amdgpu_need_post(struct amdgpu_device *adev) adev->has_hw_reset = false; return true; } - /* then check MEM_SIZE, in case the crtcs are off */ + + /* bios scratch used on CIK+ */ + if (adev->asic_type >= CHIP_BONAIRE) + return amdgpu_atombios_scratch_need_asic_init(adev); + + /* check MEM_SIZE for older asics */ reg = amdgpu_asic_get_config_memsize(adev); if ((reg != 0) && (reg != 0xffffffff)) @@ -1031,19 +982,6 @@ static unsigned int amdgpu_vga_set_decode(void *cookie, bool state) return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; } -/** - * amdgpu_check_pot_argument - check that argument is a power of two - * - * @arg: value to check - * - * Validates that a certain argument is a power of two (all asics). - * Returns true if argument is valid. - */ -static bool amdgpu_check_pot_argument(int arg) -{ - return (arg & (arg - 1)) == 0; -} - static void amdgpu_check_block_size(struct amdgpu_device *adev) { /* defines number of bits in page table versus page directory, @@ -1077,7 +1015,7 @@ static void amdgpu_check_vm_size(struct amdgpu_device *adev) if (amdgpu_vm_size == -1) return; - if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { + if (!is_power_of_2(amdgpu_vm_size)) { dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", amdgpu_vm_size); goto def_value; @@ -1118,19 +1056,31 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", amdgpu_sched_jobs); amdgpu_sched_jobs = 4; - } else if (!amdgpu_check_pot_argument(amdgpu_sched_jobs)){ + } else if (!is_power_of_2(amdgpu_sched_jobs)){ dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", amdgpu_sched_jobs); amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); } - if (amdgpu_gart_size != -1) { + if (amdgpu_gart_size < 32) { + /* gart size must be greater or equal to 32M */ + dev_warn(adev->dev, "gart size (%d) too small\n", + amdgpu_gart_size); + amdgpu_gart_size = 32; + } + + if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { /* gtt size must be greater or equal to 32M */ - if (amdgpu_gart_size < 32) { - dev_warn(adev->dev, "gart size (%d) too small\n", - amdgpu_gart_size); - amdgpu_gart_size = -1; - } + dev_warn(adev->dev, "gtt size (%d) too small\n", + amdgpu_gtt_size); + amdgpu_gtt_size = -1; + } + + /* valid range is between 4 and 9 inclusive */ + if (amdgpu_vm_fragment_size != -1 && + (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { + dev_warn(adev->dev, "valid range is between 4 and 9\n"); + amdgpu_vm_fragment_size = -1; } amdgpu_check_vm_size(adev); @@ -1138,7 +1088,7 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_check_block_size(adev); if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || - !amdgpu_check_pot_argument(amdgpu_vram_page_split))) { + !is_power_of_2(amdgpu_vram_page_split))) { dev_warn(adev->dev, "invalid VRAM page split (%d)\n", amdgpu_vram_page_split); amdgpu_vram_page_split = 1024; @@ -1901,7 +1851,8 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_DCE, AMD_IP_BLOCK_TYPE_GFX, AMD_IP_BLOCK_TYPE_SDMA, - AMD_IP_BLOCK_TYPE_VCE, + AMD_IP_BLOCK_TYPE_UVD, + AMD_IP_BLOCK_TYPE_VCE }; for (i = 0; i < ARRAY_SIZE(ip_order); i++) { @@ -2019,7 +1970,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->flags = flags; adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; - adev->mc.gtt_size = 512 * 1024 * 1024; + adev->mc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; adev->mman.buffer_funcs = NULL; @@ -2068,6 +2019,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->uvd_ctx_idx_lock); spin_lock_init(&adev->didt_idx_lock); spin_lock_init(&adev->gc_cac_idx_lock); + spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); @@ -2143,6 +2095,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_atombios_init(adev); if (r) { dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); goto failed; } @@ -2153,6 +2106,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_vpost_needed(adev)) { if (!adev->bios) { dev_err(adev->dev, "no vBIOS found\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0); r = -EINVAL; goto failed; } @@ -2160,18 +2114,28 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_atom_asic_init(adev->mode_info.atom_context); if (r) { dev_err(adev->dev, "gpu post error!\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0, 0); goto failed; } } else { DRM_INFO("GPU post is not needed\n"); } - if (!adev->is_atom_fw) { + if (adev->is_atom_fw) { + /* Initialize clocks */ + r = amdgpu_atomfirmware_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; + } + } else { /* Initialize clocks */ r = amdgpu_atombios_get_clock_info(adev); if (r) { dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); - return r; + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; } /* init i2c buses */ amdgpu_atombios_i2c_init(adev); @@ -2181,6 +2145,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_fence_driver_init(adev); if (r) { dev_err(adev->dev, "amdgpu_fence_driver_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0); goto failed; } @@ -2190,6 +2155,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_init(adev); if (r) { dev_err(adev->dev, "amdgpu_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); amdgpu_fini(adev); goto failed; } @@ -2209,6 +2175,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_ib_pool_init(adev); if (r) { dev_err(adev->dev, "IB initialization failed (%d).\n", r); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r); goto failed; } @@ -2253,12 +2220,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_late_init(adev); if (r) { dev_err(adev->dev, "amdgpu_late_init failed\n"); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); goto failed; } return 0; failed: + amdgpu_vf_error_trans_all(adev); if (runtime) vga_switcheroo_fini_domain_pm_ops(adev->dev); return r; @@ -2351,6 +2320,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) } drm_modeset_unlock_all(dev); + amdgpu_amdkfd_suspend(adev); + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); @@ -2392,10 +2363,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_save(adev); - else - amdgpu_atombios_scratch_regs_save(adev); + amdgpu_atombios_scratch_regs_save(adev); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ @@ -2444,10 +2412,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (r) goto unlock; } - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_restore(adev); - else - amdgpu_atombios_scratch_regs_restore(adev); + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ if (amdgpu_need_post(adev)) { @@ -2490,6 +2455,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } } } + r = amdgpu_amdkfd_resume(adev); + if (r) + return r; /* blat the mode back in */ if (fbcon) { @@ -2860,21 +2828,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) r = amdgpu_suspend(adev); retry: - /* Disable fb access */ - if (adev->mode_info.num_crtc) { - struct amdgpu_mode_mc_save save; - amdgpu_display_stop_mc_access(adev, &save); - amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); - } - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_save(adev); - else - amdgpu_atombios_scratch_regs_save(adev); + amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_asic_reset(adev); - if (adev->is_atom_fw) - amdgpu_atomfirmware_scratch_regs_restore(adev); - else - amdgpu_atombios_scratch_regs_restore(adev); + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -2952,6 +2908,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) } } else { dev_err(adev->dev, "asic resume failed (%d).\n", r); + amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0, r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { if (adev->rings[i] && adev->rings[i]->sched.thread) { kthread_unpark(adev->rings[i]->sched.thread); @@ -2962,12 +2919,16 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) drm_helper_resume_force_mode(adev->ddev); ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - if (r) + if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); - else + amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + } + else { dev_info(adev->dev, "GPU reset successed!\n"); + } + amdgpu_vf_error_trans_all(adev); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index cdf2ab20166a2f1b86beb7a2d84e7d68cb2b5d6c..6ad243293a78b163f0c0269ad2eba78623a60811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -482,7 +482,7 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); - drm_gem_object_unreference_unlocked(amdgpu_fb->obj); + drm_gem_object_put_unlocked(amdgpu_fb->obj); drm_framebuffer_cleanup(fb); kfree(amdgpu_fb); } @@ -542,14 +542,14 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL); if (amdgpu_fb == NULL) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(-ENOMEM); } ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b59f37c83fa633ac057251ae4a57e8b2e9dded7e..e39ec981b11c85d11ca6250caf2c4ab7de35fd97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -68,13 +68,16 @@ * - 3.16.0 - Add reserved vmid support * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. * - 3.18.0 - Export gpu always on cu bitmap + * - 3.19.0 - Add support for UVD MJPEG decode */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 18 +#define KMS_DRIVER_MINOR 19 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; -int amdgpu_gart_size = -1; /* auto */ +int amdgpu_vis_vram_limit = 0; +unsigned amdgpu_gart_size = 256; +int amdgpu_gtt_size = -1; /* auto */ int amdgpu_moverate = -1; /* auto */ int amdgpu_benchmarking = 0; int amdgpu_testing = 0; @@ -92,6 +95,7 @@ unsigned amdgpu_ip_block_mask = 0xffffffff; int amdgpu_bapm = -1; int amdgpu_deep_color = 0; int amdgpu_vm_size = -1; +int amdgpu_vm_fragment_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; @@ -106,6 +110,7 @@ unsigned amdgpu_pcie_gen_cap = 0; unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_cg_mask = 0xffffffff; unsigned amdgpu_pg_mask = 0xffffffff; +unsigned amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; unsigned amdgpu_pp_feature_mask = 0xffffffff; @@ -120,8 +125,14 @@ int amdgpu_lbpw = -1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); -MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)"); -module_param_named(gartsize, amdgpu_gart_size, int, 0600); +MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes"); +module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); + +MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc.)"); +module_param_named(gartsize, amdgpu_gart_size, uint, 0600); + +MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); +module_param_named(gttsize, amdgpu_gtt_size, int, 0600); MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)"); module_param_named(moverate, amdgpu_moverate, int, 0600); @@ -174,6 +185,9 @@ module_param_named(deep_color, amdgpu_deep_color, int, 0444); MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)"); module_param_named(vm_size, amdgpu_vm_size, int, 0444); +MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)"); +module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444); + MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)"); module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); @@ -186,7 +200,7 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both"); module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); -MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); +MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)"); module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); @@ -199,7 +213,7 @@ MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); -module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, int, 0444); +module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))"); module_param_named(no_evict, amdgpu_no_evict, int, 0444); @@ -219,6 +233,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)"); module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444); +MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))"); +module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444); + MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)"); module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444); @@ -803,7 +820,6 @@ static struct drm_driver kms_driver = { .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, - .set_busid = drm_pci_set_busid, .unload = amdgpu_driver_unload_kms, .get_vblank_counter = amdgpu_get_vblank_counter_kms, .enable_vblank = amdgpu_enable_vblank_kms, @@ -823,7 +839,6 @@ static struct drm_driver kms_driver = { .gem_close_object = amdgpu_gem_object_close, .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, - .dumb_destroy = drm_gem_dumb_destroy, .fops = &amdgpu_driver_kms_fops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index c0d8c6ff6380e8a69de8faf58d28963dde29c8de..9afa9c097e1f1012d93fa2fcd89e9fb5c8d3cffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -118,7 +118,7 @@ static void amdgpufb_destroy_pinned_object(struct drm_gem_object *gobj) amdgpu_bo_unpin(abo); amdgpu_bo_unreserve(abo); } - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); } static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, @@ -245,13 +245,12 @@ static int amdgpufb_create(struct drm_fb_helper *helper, drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); - info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &amdgpufb_ops; tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; info->fix.smem_start = adev->mc.aper_base + tmp; info->fix.smem_len = amdgpu_bo_size(abo); - info->screen_base = abo->kptr; + info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height); @@ -281,7 +280,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, } if (fb && ret) { - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); drm_framebuffer_unregister_private(fb); drm_framebuffer_cleanup(fb); kfree(fb); @@ -312,31 +311,7 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb return 0; } -/** Sets the color ramps on behalf of fbcon */ -static void amdgpu_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, - u16 blue, int regno) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - amdgpu_crtc->lut_r[regno] = red >> 6; - amdgpu_crtc->lut_g[regno] = green >> 6; - amdgpu_crtc->lut_b[regno] = blue >> 6; -} - -/** Gets the color ramps on behalf of fbcon */ -static void amdgpu_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, int regno) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - - *red = amdgpu_crtc->lut_r[regno] << 6; - *green = amdgpu_crtc->lut_g[regno] << 6; - *blue = amdgpu_crtc->lut_b[regno] << 6; -} - static const struct drm_fb_helper_funcs amdgpu_fb_helper_funcs = { - .gamma_set = amdgpu_crtc_fb_gamma_set, - .gamma_get = amdgpu_crtc_fb_gamma_get, .fb_probe = amdgpufb_create, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index a57abc1a25fb5fbe6da96410316cc85342363b44..94c1e2e8e34ca659717af06aa944c8e2675ba2a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -55,6 +55,19 @@ /* * Common GART table functions. */ + +/** + * amdgpu_gart_set_defaults - set the default gart_size + * + * @adev: amdgpu_device pointer + * + * Set the default gart_size based on parameters and available VRAM. + */ +void amdgpu_gart_set_defaults(struct amdgpu_device *adev) +{ + adev->mc.gart_size = (uint64_t)amdgpu_gart_size << 20; +} + /** * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table * @@ -131,7 +144,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->gart.robj); + NULL, NULL, 0, &adev->gart.robj); if (r) { return r; } @@ -262,6 +275,41 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, return 0; } +/** + * amdgpu_gart_map - map dma_addresses into GART entries + * + * @adev: amdgpu_device pointer + * @offset: offset into the GPU's gart aperture + * @pages: number of pages to bind + * @dma_addr: DMA addresses of pages + * + * Map the dma_addresses into GART entries (all asics). + * Returns 0 for success, -EINVAL for failure. + */ +int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags, + void *dst) +{ + uint64_t page_base; + unsigned i, j, t; + + if (!adev->gart.ready) { + WARN(1, "trying to bind memory to uninitialized GART !\n"); + return -EINVAL; + } + + t = offset / AMDGPU_GPU_PAGE_SIZE; + + for (i = 0; i < pages; i++) { + page_base = dma_addr[i]; + for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { + amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags); + page_base += AMDGPU_GPU_PAGE_SIZE; + } + } + return 0; +} + /** * amdgpu_gart_bind - bind pages into the gart page table * @@ -279,31 +327,30 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint64_t flags) { - unsigned t; - unsigned p; - uint64_t page_base; - int i, j; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + unsigned i,t,p; +#endif + int r; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); return -EINVAL; } +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); - - for (i = 0; i < pages; i++, p++) { -#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + for (i = 0; i < pages; i++, p++) adev->gart.pages[p] = pagelist[i]; #endif - if (adev->gart.ptr) { - page_base = dma_addr[i]; - for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags); - page_base += AMDGPU_GPU_PAGE_SIZE; - } - } + + if (adev->gart.ptr) { + r = amdgpu_gart_map(adev, offset, pages, dma_addr, flags, + adev->gart.ptr); + if (r) + return r; } + mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); return 0; @@ -333,8 +380,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev) if (r) return r; /* Compute table size */ - adev->gart.num_cpu_pages = adev->mc.gtt_size / PAGE_SIZE; - adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE; + adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE; + adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE; DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h new file mode 100644 index 0000000000000000000000000000000000000000..d4cce69362003241455a6d60bc57c3d648700fac --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -0,0 +1,77 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_GART_H__ +#define __AMDGPU_GART_H__ + +#include + +/* + * GART structures, functions & helpers + */ +struct amdgpu_device; +struct amdgpu_bo; +struct amdgpu_gart_funcs; + +#define AMDGPU_GPU_PAGE_SIZE 4096 +#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) +#define AMDGPU_GPU_PAGE_SHIFT 12 +#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) + +struct amdgpu_gart { + dma_addr_t table_addr; + struct amdgpu_bo *robj; + void *ptr; + unsigned num_gpu_pages; + unsigned num_cpu_pages; + unsigned table_size; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + struct page **pages; +#endif + bool ready; + + /* Asic default pte flags */ + uint64_t gart_pte_flags; + + const struct amdgpu_gart_funcs *gart_funcs; +}; + +void amdgpu_gart_set_defaults(struct amdgpu_device *adev); +int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); +void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); +int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); +void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); +int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); +void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); +int amdgpu_gart_init(struct amdgpu_device *adev); +void amdgpu_gart_fini(struct amdgpu_device *adev); +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, + int pages); +int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags, + void *dst); +int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, + int pages, struct page **pagelist, + dma_addr_t *dma_addr, uint64_t flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 621f739103a6a30c9cf38801343ff5397a928d63..7171968f261e1a13094f3c94b2d649a382b2f6a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -49,7 +49,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, struct drm_gem_object **obj) { struct amdgpu_bo *robj; - unsigned long max_size; int r; *obj = NULL; @@ -58,20 +57,9 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, alignment = PAGE_SIZE; } - if (!(initial_domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA))) { - /* Maximum bo size is the unpinned gtt size since we use the gtt to - * handle vram to system pool migrations. - */ - max_size = adev->mc.gtt_size - adev->gart_pin_size; - if (size > max_size) { - DRM_DEBUG("Allocation size %ldMb bigger than %ldMb limit\n", - size >> 20, max_size >> 20); - return -ENOMEM; - } - } retry: r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, NULL, &robj); + flags, NULL, NULL, 0, &robj); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { @@ -103,7 +91,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) spin_lock(&file->table_lock); idr_for_each_entry(&file->object_idr, gobj, handle) { WARN_ONCE(1, "And also active allocations!\n"); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); } idr_destroy(&file->object_idr); spin_unlock(&file->table_lock); @@ -237,9 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (args->in.domain_flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_VRAM_CLEARED| - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) + AMDGPU_GEM_CREATE_VRAM_CLEARED)) return -EINVAL; /* reject invalid gem domains */ @@ -275,7 +261,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) return r; @@ -318,7 +304,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, return r; bo = gem_to_amdgpu_bo(gobj); - bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); if (r) @@ -353,7 +339,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) return r; @@ -367,7 +353,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, up_read(¤t->mm->mmap_sem); release_object: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -386,11 +372,11 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, robj = gem_to_amdgpu_bo(gobj); if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) || (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return -EPERM; } *offset_p = amdgpu_bo_mmap_offset(robj); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return 0; } @@ -460,7 +446,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } else r = ret; - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -503,7 +489,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, unreserve: amdgpu_bo_unreserve(robj); out: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -635,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case AMDGPU_VA_OP_MAP: - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, args->map_size); if (r) goto error_backoff; @@ -655,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->map_size); break; case AMDGPU_VA_OP_REPLACE: - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address, args->map_size); if (r) goto error_backoff; @@ -676,7 +662,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ttm_eu_backoff_reservation(&ticket, &list); error_unref: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -701,11 +687,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, switch (args->op) { case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: { struct drm_amdgpu_gem_create_in info; - void __user *out = (void __user *)(uintptr_t)args->value; + void __user *out = u64_to_user_ptr(args->value); info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; - info.domains = robj->prefered_domains; + info.domains = robj->preferred_domains; info.domain_flags = robj->flags; amdgpu_bo_unreserve(robj); if (copy_to_user(out, &info, sizeof(info))) @@ -723,10 +709,10 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, amdgpu_bo_unreserve(robj); break; } - robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | + robj->preferred_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU); - robj->allowed_domains = robj->prefered_domains; + robj->allowed_domains = robj->preferred_domains; if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -738,7 +724,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, } out: - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); return r; } @@ -766,7 +752,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, r = drm_gem_handle_create(file_priv, gobj, &handle); /* drop reference from allocate - handle holds it now */ - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (r) { return r; } @@ -784,6 +770,7 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) unsigned domain; const char *placement; unsigned pin_count; + uint64_t offset; domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); switch (domain) { @@ -798,9 +785,12 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) placement = " CPU"; break; } - seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx", - id, amdgpu_bo_size(bo), placement, - amdgpu_bo_gpu_offset(bo)); + seq_printf(m, "\t0x%08x: %12ld byte %s", + id, amdgpu_bo_size(bo), placement); + + offset = ACCESS_ONCE(bo->tbo.mem.start); + if (offset != AMDGPU_BO_INVALID_OFFSET) + seq_printf(m, " @ 0x%010Lx", offset); pin_count = ACCESS_ONCE(bo->pin_count); if (pin_count) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e26108aad3fe246bfd12d9e1417c91a353cef5e3..4f6c68fc1dd91a43813a2782bbc9cf3dbbf43ded 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -125,7 +125,8 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) if (mec >= adev->gfx.mec.num_mec) break; - if (adev->gfx.mec.num_mec > 1) { + /* FIXME: spreading the queues across pipes causes perf regressions */ + if (0) { /* policy: amdgpu owns the first two queues of the first MEC */ if (mec == 0 && queue < 2) set_bit(i, adev->gfx.mec.queue_bitmap); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index f7d22c44034d43cce77ecd096c953e18aadc955d..9e05e257729f2e20dd91c4e81f6299e0800cc0e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -28,7 +28,7 @@ struct amdgpu_gtt_mgr { struct drm_mm mm; spinlock_t lock; - uint64_t available; + atomic64_t available; }; /** @@ -42,15 +42,19 @@ struct amdgpu_gtt_mgr { static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, unsigned long p_size) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr; + uint64_t start, size; mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); if (!mgr) return -ENOMEM; - drm_mm_init(&mgr->mm, 0, p_size); + start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; + size = (adev->mc.gart_size >> PAGE_SHIFT) - start; + drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); - mgr->available = p_size; + atomic64_set(&mgr->available, p_size); man->priv = mgr; return 0; } @@ -80,6 +84,20 @@ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) return 0; } +/** + * amdgpu_gtt_mgr_is_allocated - Check if mem has address space + * + * @mem: the mem object to check + * + * Check if a mem object has already address space allocated. + */ +bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem) +{ + struct drm_mm_node *node = mem->mm_node; + + return (node->start != AMDGPU_BO_INVALID_OFFSET); +} + /** * amdgpu_gtt_mgr_alloc - allocate new ranges * @@ -95,13 +113,14 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr = man->priv; struct drm_mm_node *node = mem->mm_node; enum drm_mm_insert_mode mode; unsigned long fpfn, lpfn; int r; - if (node->start != AMDGPU_BO_INVALID_OFFSET) + if (amdgpu_gtt_mgr_is_allocated(mem)) return 0; if (place) @@ -112,7 +131,7 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, if (place && place->lpfn) lpfn = place->lpfn; else - lpfn = man->size; + lpfn = adev->gart.num_cpu_pages; mode = DRM_MM_INSERT_BEST; if (place && place->flags & TTM_PL_FLAG_TOPDOWN) @@ -134,15 +153,6 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, return r; } -void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager *man) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); - struct amdgpu_gtt_mgr *mgr = man->priv; - - seq_printf(m, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", - man->size, mgr->available, (u64)atomic64_read(&adev->gtt_usage) >> 20); - -} /** * amdgpu_gtt_mgr_new - allocate a new node * @@ -163,11 +173,11 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, int r; spin_lock(&mgr->lock); - if (mgr->available < mem->num_pages) { + if (atomic64_read(&mgr->available) < mem->num_pages) { spin_unlock(&mgr->lock); return 0; } - mgr->available -= mem->num_pages; + atomic64_sub(mem->num_pages, &mgr->available); spin_unlock(&mgr->lock); node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -194,9 +204,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, return 0; err_out: - spin_lock(&mgr->lock); - mgr->available += mem->num_pages; - spin_unlock(&mgr->lock); + atomic64_add(mem->num_pages, &mgr->available); return r; } @@ -223,30 +231,47 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, spin_lock(&mgr->lock); if (node->start != AMDGPU_BO_INVALID_OFFSET) drm_mm_remove_node(node); - mgr->available += mem->num_pages; spin_unlock(&mgr->lock); + atomic64_add(mem->num_pages, &mgr->available); kfree(node); mem->mm_node = NULL; } +/** + * amdgpu_gtt_mgr_usage - return usage of GTT domain + * + * @man: TTM memory type manager + * + * Return how many bytes are used in the GTT domain + */ +uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + + return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; +} + /** * amdgpu_gtt_mgr_debug - dump VRAM table * * @man: TTM memory type manager - * @prefix: text prefix + * @printer: DRM printer to use * * Dump the table content using printk. */ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct amdgpu_gtt_mgr *mgr = man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, &p); + drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); + + drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", + man->size, (u64)atomic64_read(&mgr->available), + amdgpu_gtt_mgr_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index f774b3f497d28735829d9b6407cd1832e9174945..659997bfff303b789f9f5fa6ae8ec17b0a02ae5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, unsigned i; int r = 0; + bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; @@ -165,15 +166,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || amdgpu_vm_need_pipeline_sync(ring, job))) { - amdgpu_ring_emit_pipeline_sync(ring); + need_pipe_sync = true; dma_fence_put(tmp); } if (ring->funcs->insert_start) ring->funcs->insert_start(ring); - if (vm) { - r = amdgpu_vm_flush(ring, job); + if (job) { + r = amdgpu_vm_flush(ring, job, need_pipe_sync); if (r) { amdgpu_ring_undo(ring); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 62da6c5c6095a03ca0bc0f2c93f783212abd1e94..4bdd851f56d081310614f27dce5093255ecf7dfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -220,6 +220,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev) int r = 0; spin_lock_init(&adev->irq.lock); + + /* Disable vblank irqs aggressively for power-saving */ + adev->ddev->vblank_disable_immediate = true; + r = drm_vblank_init(adev->ddev, adev->mode_info.num_crtc); if (r) { return r; @@ -263,7 +267,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) { unsigned i, j; - drm_vblank_cleanup(adev->ddev); if (adev->irq.installed) { drm_irq_uninstall(adev->ddev); adev->irq.installed = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 3d641e10e6b65c728ddda00be6027d7fe42cc461..4510627ae83e9b57e19dccfe19f260da00f918f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -81,6 +81,8 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); if (r) kfree(*job); + else + (*job)->vm_pd_addr = adev->gart.table_addr; return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b0b23101d1c870ddeefc89b15818cd5b13ea13a7..e16229000a983e8cc068a70af7ec32074be06c30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -158,7 +158,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - amdgpu_amdkfd_load_interface(adev); amdgpu_amdkfd_device_probe(adev); amdgpu_amdkfd_device_init(adev); @@ -456,13 +455,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_usage); + ui64 = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: - ui64 = atomic64_read(&adev->vram_vis_usage); + ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: - ui64 = atomic64_read(&adev->gtt_usage); + ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GDS_CONFIG: { struct drm_amdgpu_info_gds gds_info; @@ -485,7 +484,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file vram_gtt.vram_size -= adev->vram_pin_size; vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); - vram_gtt.gtt_size = adev->mc.gtt_size; + vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; + vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= adev->gart_pin_size; return copy_to_user(out, &vram_gtt, min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; @@ -497,7 +497,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file mem.vram.total_heap_size = adev->mc.real_vram_size; mem.vram.usable_heap_size = adev->mc.real_vram_size - adev->vram_pin_size; - mem.vram.heap_usage = atomic64_read(&adev->vram_usage); + mem.vram.heap_usage = + amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -506,14 +507,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->mc.visible_vram_size - (adev->vram_pin_size - adev->invisible_pin_size); mem.cpu_accessible_vram.heap_usage = - atomic64_read(&adev->vram_vis_usage); + amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.cpu_accessible_vram.max_allocation = mem.cpu_accessible_vram.usable_heap_size * 3 / 4; - mem.gtt.total_heap_size = adev->mc.gtt_size; - mem.gtt.usable_heap_size = - adev->mc.gtt_size - adev->gart_pin_size; - mem.gtt.heap_usage = atomic64_read(&adev->gtt_usage); + mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size; + mem.gtt.total_heap_size *= PAGE_SIZE; + mem.gtt.usable_heap_size = mem.gtt.total_heap_size + - adev->gart_pin_size; + mem.gtt.heap_usage = + amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; return copy_to_user(out, &mem, @@ -571,8 +574,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; } else { - dev_info.max_engine_clock = adev->pm.default_sclk * 10; - dev_info.max_memory_clock = adev->pm.default_mclk * 10; + dev_info.max_engine_clock = adev->clock.default_sclk * 10; + dev_info.max_memory_clock = adev->clock.default_mclk * 10; } dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * @@ -587,10 +590,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); - dev_info.pte_fragment_size = (1 << AMDGPU_LOG2_PAGES_PER_FRAG) * - AMDGPU_GPU_PAGE_SIZE; + dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; - dev_info.cu_active_number = adev->gfx.cu_info.number; dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; dev_info.ce_ram_size = adev->gfx.ce_ram_size; @@ -839,7 +840,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } if (amdgpu_sriov_vf(adev)) { - r = amdgpu_map_static_csa(adev, &fpriv->vm); + r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); if (r) goto out_suspend; } @@ -892,8 +893,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); - amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va); - fpriv->vm.csa_bo_va = NULL; + amdgpu_vm_bo_rmv(adev, fpriv->csa_va); + fpriv->csa_va = NULL; amdgpu_bo_unreserve(adev->virt.csa_obj); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 6558a3ed57a7f6a1127f81aa0e1b5a861aa21b8d..e1cde6b80027fe1cad56b6169c38aadac1974e3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -146,36 +146,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } } -/** - * amdgpu_mn_invalidate_page - callback to notify about mm change - * - * @mn: our notifier - * @mn: the mm this callback is about - * @address: address of invalidate page - * - * Invalidation of a single page. Blocks for all BOs mapping it - * and unmap them by move them into system domain again. - */ -static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); - struct interval_tree_node *it; - - mutex_lock(&rmn->lock); - - it = interval_tree_iter_first(&rmn->objects, address, address); - if (it) { - struct amdgpu_mn_node *node; - - node = container_of(it, struct amdgpu_mn_node, it); - amdgpu_mn_invalidate_node(node, address, address); - } - - mutex_unlock(&rmn->lock); -} - /** * amdgpu_mn_invalidate_range_start - callback to notify about mm change * @@ -215,7 +185,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, static const struct mmu_notifier_ops amdgpu_mn_ops = { .release = amdgpu_mn_release, - .invalidate_page = amdgpu_mn_invalidate_page, .invalidate_range_start = amdgpu_mn_invalidate_range_start, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 43a9d3aec6c426e20bbff9bbb30084fe3cc523b2..2af2678ddaf6254948d529f647f87bb1ef74a959 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -257,15 +257,7 @@ struct amdgpu_audio { int num_pins; }; -struct amdgpu_mode_mc_save { - u32 vga_render_control; - u32 vga_hdp_control; - bool crtc_enabled[AMDGPU_MAX_CRTCS]; -}; - struct amdgpu_display_funcs { - /* vga render */ - void (*set_vga_render_state)(struct amdgpu_device *adev, bool render); /* display watermarks */ void (*bandwidth_update)(struct amdgpu_device *adev); /* get frame count */ @@ -300,10 +292,6 @@ struct amdgpu_display_funcs { uint16_t connector_object_id, struct amdgpu_hpd *hpd, struct amdgpu_router *router); - void (*stop_mc_access)(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save); - void (*resume_mc_access)(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save); }; struct amdgpu_mode_info { @@ -369,7 +357,6 @@ struct amdgpu_atom_ss { struct amdgpu_crtc { struct drm_crtc base; int crtc_id; - u16 lut_r[256], lut_g[256], lut_b[256]; bool enabled; bool can_tile; uint32_t crtc_offset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 8ee69652be8ceea145129cefb46ab46920744a74..e7e899190befb9f74edd45a82eeefbed7529fde5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -37,55 +37,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" - - -static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, - struct ttm_mem_reg *mem) -{ - if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size) - return 0; - - return ((mem->start << PAGE_SHIFT) + mem->size) > - adev->mc.visible_vram_size ? - adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : - mem->size; -} - -static void amdgpu_update_memory_usage(struct amdgpu_device *adev, - struct ttm_mem_reg *old_mem, - struct ttm_mem_reg *new_mem) -{ - u64 vis_size; - if (!adev) - return; - - if (new_mem) { - switch (new_mem->mem_type) { - case TTM_PL_TT: - atomic64_add(new_mem->size, &adev->gtt_usage); - break; - case TTM_PL_VRAM: - atomic64_add(new_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, new_mem); - atomic64_add(vis_size, &adev->vram_vis_usage); - break; - } - } - - if (old_mem) { - switch (old_mem->mem_type) { - case TTM_PL_TT: - atomic64_sub(old_mem->size, &adev->gtt_usage); - break; - case TTM_PL_VRAM: - atomic64_sub(old_mem->size, &adev->vram_usage); - vis_size = amdgpu_get_vis_part_size(adev, old_mem); - atomic64_sub(vis_size, &adev->vram_vis_usage); - break; - } - } -} - static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); @@ -93,7 +44,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct amdgpu_bo, tbo); - amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL); + amdgpu_bo_kunmap(bo); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); @@ -219,7 +170,7 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, } /** - * amdgpu_bo_create_kernel - create BO for kernel use + * amdgpu_bo_create_reserved - create reserved BO for kernel use * * @adev: amdgpu device object * @size: size for the new BO @@ -229,24 +180,30 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * - * Allocates and pins a BO for kernel internal use. + * Allocates and pins a BO for kernel internal use, and returns it still + * reserved. * * Returns 0 on success, negative error code otherwise. */ -int amdgpu_bo_create_kernel(struct amdgpu_device *adev, - unsigned long size, int align, - u32 domain, struct amdgpu_bo **bo_ptr, - u64 *gpu_addr, void **cpu_addr) +int amdgpu_bo_create_reserved(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) { + bool free = false; int r; - r = amdgpu_bo_create(adev, size, align, true, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo_ptr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); - return r; + if (!*bo_ptr) { + r = amdgpu_bo_create(adev, size, align, true, domain, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, 0, bo_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", + r); + return r; + } + free = true; } r = amdgpu_bo_reserve(*bo_ptr, false); @@ -269,19 +226,51 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, } } - amdgpu_bo_unreserve(*bo_ptr); - return 0; error_unreserve: amdgpu_bo_unreserve(*bo_ptr); error_free: - amdgpu_bo_unref(bo_ptr); + if (free) + amdgpu_bo_unref(bo_ptr); return r; } +/** + * amdgpu_bo_create_kernel - create BO for kernel use + * + * @adev: amdgpu device object + * @size: size for the new BO + * @align: alignment for the new BO + * @domain: where to place it + * @bo_ptr: resulting BO + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: optional CPU address mapping + * + * Allocates and pins a BO for kernel internal use. + * + * Returns 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_kernel(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr) +{ + int r; + + r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr, + gpu_addr, cpu_addr); + + if (r) + return r; + + amdgpu_bo_unreserve(*bo_ptr); + + return 0; +} + /** * amdgpu_bo_free_kernel - free BO for kernel use * @@ -317,12 +306,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct sg_table *sg, struct ttm_placement *placement, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; - u64 initial_bytes_moved; + u64 initial_bytes_moved, bytes_moved; size_t acc_size; int r; @@ -351,13 +341,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, } INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); - bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | + bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA); - bo->allowed_domains = bo->prefered_domains; + bo->allowed_domains = bo->preferred_domains; if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; @@ -398,8 +388,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, !kernel, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); - amdgpu_cs_report_moved_bytes(adev, - atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved); + bytes_moved = atomic64_read(&adev->num_bytes_moved) - + initial_bytes_moved; + if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved); + else + amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); if (unlikely(r != 0)) return r; @@ -411,7 +407,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -426,6 +422,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, trace_amdgpu_bo_create(bo); + /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ + if (type == ttm_bo_type_device) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + return 0; fail_unreserve: @@ -459,6 +459,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, &placement, bo->tbo.resv, + 0, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); @@ -470,11 +471,15 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } +/* init_value will only take effect when flags contains + * AMDGPU_GEM_CREATE_VRAM_CLEARED. + */ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct ttm_placement placement = {0}; @@ -489,7 +494,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, flags, sg, &placement, - resv, bo_ptr); + resv, init_value, bo_ptr); if (r) return r; @@ -535,7 +540,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, amdgpu_bo_size(bo), resv, fence, - direct); + direct, false); if (!r) amdgpu_bo_fence(bo, *fence, true); @@ -551,7 +556,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) if (bo->pin_count) return 0; - domain = bo->prefered_domains; + domain = bo->preferred_domains; retry: amdgpu_ttm_placement_from_domain(bo, domain); @@ -588,7 +593,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr, amdgpu_bo_size(bo), resv, fence, - direct); + direct, false); if (!r) amdgpu_bo_fence(bo, *fence, true); @@ -598,16 +603,16 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) { - bool is_iomem; + void *kptr; long r; if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; - if (bo->kptr) { - if (ptr) { - *ptr = bo->kptr; - } + kptr = amdgpu_bo_kptr(bo); + if (kptr) { + if (ptr) + *ptr = kptr; return 0; } @@ -620,19 +625,23 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (r) return r; - bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); if (ptr) - *ptr = bo->kptr; + *ptr = amdgpu_bo_kptr(bo); return 0; } +void *amdgpu_bo_kptr(struct amdgpu_bo *bo) +{ + bool is_iomem; + + return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); +} + void amdgpu_bo_kunmap(struct amdgpu_bo *bo) { - if (bo->kptr == NULL) - return; - bo->kptr = NULL; - ttm_bo_kunmap(&bo->kmap); + if (bo->kmap.bo) + ttm_bo_kunmap(&bo->kmap); } struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) @@ -724,15 +733,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, dev_err(adev->dev, "%p pin failed\n", bo); goto error; } - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); - if (unlikely(r)) { - dev_err(adev->dev, "%p bind failed\n", bo); - goto error; - } bo->pin_count = 1; - if (gpu_addr != NULL) + if (gpu_addr != NULL) { + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (unlikely(r)) { + dev_err(adev->dev, "%p bind failed\n", bo); + goto error; + } *gpu_addr = amdgpu_bo_gpu_offset(bo); + } if (domain == AMDGPU_GEM_DOMAIN_VRAM) { adev->vram_pin_size += amdgpu_bo_size(bo); if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) @@ -921,6 +931,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, abo = container_of(bo, struct amdgpu_bo, tbo); amdgpu_vm_bo_invalidate(adev, abo); + amdgpu_bo_kunmap(abo); + /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); @@ -930,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; /* move_notify is called before move happens */ - amdgpu_update_memory_usage(adev, &bo->mem, new_mem); - trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } @@ -939,19 +949,22 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - unsigned long offset, size, lpfn; - int i, r; + unsigned long offset, size; + int r; if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; abo = container_of(bo, struct amdgpu_bo, tbo); + + /* Remember that this BO was accessed by the CPU */ + abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + if (bo->mem.mem_type != TTM_PL_VRAM) return 0; size = bo->mem.num_pages << PAGE_SHIFT; offset = bo->mem.start << PAGE_SHIFT; - /* TODO: figure out how to map scattered VRAM to the CPU */ if ((offset + size) <= adev->mc.visible_vram_size) return 0; @@ -961,26 +974,21 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ atomic64_inc(&adev->num_vram_cpu_page_faults); - amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); - lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; - for (i = 0; i < abo->placement.num_placement; i++) { - /* Force into visible VRAM */ - if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) && - (!abo->placements[i].lpfn || - abo->placements[i].lpfn > lpfn)) - abo->placements[i].lpfn = lpfn; - } + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT); + + /* Avoid costly evictions; only set GTT as a busy placement */ + abo->placement.num_busy_placement = 1; + abo->placement.busy_placement = &abo->placements[1]; + r = ttm_bo_validate(bo, &abo->placement, false, false); - if (unlikely(r == -ENOMEM)) { - amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); - return ttm_bo_validate(bo, &abo->placement, false, false); - } else if (unlikely(r != 0)) { + if (unlikely(r != 0)) return r; - } offset = bo->mem.start << PAGE_SHIFT; /* this should never happen */ - if ((offset + size) > adev->mc.visible_vram_size) + if (bo->mem.mem_type == TTM_PL_VRAM && + (offset + size) > adev->mc.visible_vram_size) return -EINVAL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 382485115b0641b059602d21fa92be33281ac8de..a288fa6d72c8026f60a62625f4bf421cc6d3a74e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -33,6 +33,61 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX +/* bo virtual addresses in a vm */ +struct amdgpu_bo_va_mapping { + struct list_head list; + struct rb_node rb; + uint64_t start; + uint64_t last; + uint64_t __subtree_last; + uint64_t offset; + uint64_t flags; +}; + +/* User space allocated BO in a VM */ +struct amdgpu_bo_va { + struct amdgpu_vm_bo_base base; + + /* protected by bo being reserved */ + struct dma_fence *last_pt_update; + unsigned ref_count; + + /* mappings for this bo_va */ + struct list_head invalids; + struct list_head valids; +}; + +struct amdgpu_bo { + /* Protected by tbo.reserved */ + u32 preferred_domains; + u32 allowed_domains; + struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + struct ttm_placement placement; + struct ttm_buffer_object tbo; + struct ttm_bo_kmap_obj kmap; + u64 flags; + unsigned pin_count; + u64 tiling_flags; + u64 metadata_flags; + void *metadata; + u32 metadata_size; + unsigned prime_shared_count; + /* list of all virtual address to which this bo is associated to */ + struct list_head va; + /* Constant after initialization */ + struct drm_gem_object gem_base; + struct amdgpu_bo *parent; + struct amdgpu_bo *shadow; + + struct ttm_bo_kmap_obj dma_buf_vmap; + struct amdgpu_mn *mn; + + union { + struct list_head mn_list; + struct list_head shadow_list; + }; +}; + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type @@ -120,7 +175,11 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) */ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) { - return bo->tbo.mem.mem_type != TTM_PL_SYSTEM; + switch (bo->tbo.mem.mem_type) { + case TTM_PL_TT: return amdgpu_ttm_is_bound(bo->tbo.ttm); + case TTM_PL_VRAM: return true; + default: return false; + } } int amdgpu_bo_create(struct amdgpu_device *adev, @@ -128,6 +187,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_restricted(struct amdgpu_device *adev, unsigned long size, int byte_align, @@ -135,7 +195,12 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, struct sg_table *sg, struct ttm_placement *placement, struct reservation_object *resv, + uint64_t init_value, struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create_reserved(struct amdgpu_device *adev, + unsigned long size, int align, + u32 domain, struct amdgpu_bo **bo_ptr, + u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, @@ -143,6 +208,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); +void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h index c19c4d138751df410be7f6a9c44fc462eced2dd4..f21a7716b90e67b7cdd184046a107153bc49d19a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h @@ -30,6 +30,7 @@ struct cg_flag_name const char *name; }; +void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); int amdgpu_pm_sysfs_init(struct amdgpu_device *adev); void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev); void amdgpu_pm_print_power_states(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 6bdc866570ab8d67575ac3734d1f67573978df79..5b3f92891f899d8b6f00da5ade675245beba6571 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -69,7 +69,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, ww_mutex_lock(&resv->lock, NULL); ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo); + AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo); ww_mutex_unlock(&resv->lock); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 4083be61b328fb56f1832fa95496074e8c84cef7..8c2204c7b3847c3ce18042b48d70516155b843e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -63,8 +63,13 @@ static int psp_sw_init(void *handle) psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; break; case CHIP_RAVEN: +#if 0 + psp->init_microcode = psp_v10_0_init_microcode; +#endif psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; psp->ring_init = psp_v10_0_ring_init; + psp->ring_create = psp_v10_0_ring_create; + psp->ring_destroy = psp_v10_0_ring_destroy; psp->cmd_submit = psp_v10_0_cmd_submit; psp->compare_sram_data = psp_v10_0_compare_sram_data; break; @@ -95,9 +100,8 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, int i; struct amdgpu_device *adev = psp->adev; - val = RREG32(reg_index); - for (i = 0; i < adev->usec_timeout; i++) { + val = RREG32(reg_index); if (check_changed) { if (val != reg_val) return 0; @@ -118,33 +122,18 @@ psp_cmd_submit_buf(struct psp_context *psp, int index) { int ret; - struct amdgpu_bo *cmd_buf_bo; - uint64_t cmd_buf_mc_addr; - struct psp_gfx_cmd_resp *cmd_buf_mem; - struct amdgpu_device *adev = psp->adev; - - ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &cmd_buf_bo, &cmd_buf_mc_addr, - (void **)&cmd_buf_mem); - if (ret) - return ret; - memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); + memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); - memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); + memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); - ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr, + ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, fence_mc_addr, index); while (*((unsigned int *)psp->fence_buf) != index) { msleep(1); } - amdgpu_bo_free_kernel(&cmd_buf_bo, - &cmd_buf_mc_addr, - (void **)&cmd_buf_mem); - return ret; } @@ -351,6 +340,13 @@ static int psp_load_fw(struct amdgpu_device *adev) &psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); + if (ret) + goto failed_mem2; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); if (ret) goto failed_mem1; @@ -358,7 +354,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ring_init(psp, PSP_RING_TYPE__KM); if (ret) - goto failed_mem1; + goto failed_mem; ret = psp_tmr_init(psp); if (ret) @@ -379,9 +375,13 @@ static int psp_load_fw(struct amdgpu_device *adev) return 0; failed_mem: + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, + &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); +failed_mem1: amdgpu_bo_free_kernel(&psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); -failed_mem1: +failed_mem2: amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); failed: @@ -435,16 +435,15 @@ static int psp_hw_fini(void *handle) psp_ring_destroy(psp, PSP_RING_TYPE__KM); - if (psp->tmr_buf) - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); - - if (psp->fw_pri_buf) - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); - - if (psp->fence_buf_bo) - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + amdgpu_bo_free_kernel(&psp->asd_shared_bo, &psp->asd_shared_mc_addr, + &psp->asd_shared_buf); + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); kfree(psp->cmd); psp->cmd = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 1a1c8b469f9385758c188c825597cf67b6121c2d..538fa9dbfb21200094df54f07ad598f77f323fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -108,6 +108,11 @@ struct psp_context struct amdgpu_bo *fence_buf_bo; uint64_t fence_buf_mc_addr; void *fence_buf; + + /* cmd buffer */ + struct amdgpu_bo *cmd_buf_bo; + uint64_t cmd_buf_mc_addr; + struct psp_gfx_cmd_resp *cmd_buf_mem; }; struct amdgpu_psp_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 75165e07b1cd8807831d4dd602a9c1a2037d8ad6..6c5646b48d1a5fce145df441ce66098ea62e95d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -184,32 +184,16 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - if (ring->funcs->support_64bit_ptrs) { - r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } - - r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } - - } else { - r = amdgpu_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } - - r = amdgpu_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; - } + r = amdgpu_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } + r = amdgpu_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; } r = amdgpu_wb_get(adev, &ring->fence_offs); @@ -277,18 +261,15 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { ring->ready = false; - if (ring->funcs->support_64bit_ptrs) { - amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free_64bit(ring->adev, ring->fence_offs); - amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs); - amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs); - } else { - amdgpu_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free(ring->adev, ring->fence_offs); - amdgpu_wb_free(ring->adev, ring->rptr_offs); - amdgpu_wb_free(ring->adev, ring->wptr_offs); - } + /* Not to finish a ring which is not initialized */ + if (!(ring->adev) || !(ring->adev->rings[ring->idx])) + return; + + amdgpu_wb_free(ring->adev, ring->rptr_offs); + amdgpu_wb_free(ring->adev, ring->wptr_offs); + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bc8dec992f73d5da7a4d8742fd3d7bc110cd0312..322d25299a00cf364fba3b8ad4343ffa22b3efb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -212,4 +212,44 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) } +static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) +{ + if (ring->count_dw <= 0) + DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); + ring->ring[ring->wptr++ & ring->buf_mask] = v; + ring->wptr &= ring->ptr_mask; + ring->count_dw--; +} + +static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, + void *src, int count_dw) +{ + unsigned occupied, chunk1, chunk2; + void *dst; + + if (unlikely(ring->count_dw < count_dw)) + DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); + + occupied = ring->wptr & ring->buf_mask; + dst = (void *)&ring->ring[occupied]; + chunk1 = ring->buf_mask + 1 - occupied; + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + + if (chunk1) + memcpy(dst, src, chunk1); + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); + } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 5ca75a456ad2ad94e81773fc209e1977dc18aa9f..3144400435b79ca3c3e58bf18abba5462efe0bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -64,7 +64,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&sa_manager->flist[i]); r = amdgpu_bo_create(adev, size, align, true, domain, - 0, NULL, NULL, &sa_manager->bo); + 0, NULL, NULL, 0, &sa_manager->bo); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 15510dadde018d87d455b371af32e8d349c3a6e1..ed8c3739015be481cfd7072b712b3c8ee562d2e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -33,7 +33,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *vram_obj = NULL; struct amdgpu_bo **gtt_obj = NULL; - uint64_t gtt_addr, vram_addr; + uint64_t gart_addr, vram_addr; unsigned n, size; int i, r; @@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->mc.gtt_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; @@ -61,7 +61,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, NULL, &vram_obj); + NULL, NULL, 0, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -76,13 +76,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } for (i = 0; i < n; i++) { void *gtt_map, *vram_map; - void **gtt_start, **gtt_end; + void **gart_start, **gart_end; void **vram_start, **vram_end; struct dma_fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, gtt_obj + i); + NULL, 0, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; @@ -91,7 +91,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) r = amdgpu_bo_reserve(gtt_obj[i], false); if (unlikely(r != 0)) goto out_lclean_unref; - r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, >t_addr); + r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr); if (r) { DRM_ERROR("Failed to pin GTT object %d\n", i); goto out_lclean_unres; @@ -103,15 +103,15 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - for (gtt_start = gtt_map, gtt_end = gtt_map + size; - gtt_start < gtt_end; - gtt_start++) - *gtt_start = gtt_start; + for (gart_start = gtt_map, gart_end = gtt_map + size; + gart_start < gart_end; + gart_start++) + *gart_start = gart_start; amdgpu_bo_kunmap(gtt_obj[i]); - r = amdgpu_copy_buffer(ring, gtt_addr, vram_addr, - size, NULL, &fence, false); + r = amdgpu_copy_buffer(ring, gart_addr, vram_addr, + size, NULL, &fence, false, false); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); @@ -132,21 +132,21 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - for (gtt_start = gtt_map, gtt_end = gtt_map + size, + for (gart_start = gtt_map, gart_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; vram_start < vram_end; - gtt_start++, vram_start++) { - if (*vram_start != gtt_start) { + gart_start++, vram_start++) { + if (*vram_start != gart_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " "expected 0x%p (GTT/VRAM offset " "0x%16llx/0x%16llx)\n", - i, *vram_start, gtt_start, + i, *vram_start, gart_start, (unsigned long long) - (gtt_addr - adev->mc.gtt_start + - (void*)gtt_start - gtt_map), + (gart_addr - adev->mc.gart_start + + (void*)gart_start - gtt_map), (unsigned long long) (vram_addr - adev->mc.vram_start + - (void*)gtt_start - gtt_map)); + (void*)gart_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; } @@ -155,8 +155,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(vram_obj); - r = amdgpu_copy_buffer(ring, vram_addr, gtt_addr, - size, NULL, &fence, false); + r = amdgpu_copy_buffer(ring, vram_addr, gart_addr, + size, NULL, &fence, false, false); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); @@ -177,20 +177,20 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - for (gtt_start = gtt_map, gtt_end = gtt_map + size, + for (gart_start = gtt_map, gart_end = gtt_map + size, vram_start = vram_map, vram_end = vram_map + size; - gtt_start < gtt_end; - gtt_start++, vram_start++) { - if (*gtt_start != vram_start) { + gart_start < gart_end; + gart_start++, vram_start++) { + if (*gart_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " "expected 0x%p (VRAM/GTT offset " "0x%16llx/0x%16llx)\n", - i, *gtt_start, vram_start, + i, *gart_start, vram_start, (unsigned long long) (vram_addr - adev->mc.vram_start + (void*)vram_start - vram_map), (unsigned long long) - (gtt_addr - adev->mc.gtt_start + + (gart_addr - adev->mc.gart_start + (void*)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; @@ -200,7 +200,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", - gtt_addr - adev->mc.gtt_start); + gart_addr - adev->mc.gart_start); continue; out_lclean_unpin: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 8601904e670ae7cf094a341c19abb76404fb3ec6..1c88bd5e29adc161719a93bc07de4485c4fecce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -14,6 +14,62 @@ #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) +TRACE_EVENT(amdgpu_ttm_tt_populate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + +TRACE_EVENT(amdgpu_ttm_tt_unpopulate, + TP_PROTO(struct amdgpu_device *adev, uint64_t dma_address, uint64_t phys_address), + TP_ARGS(adev, dma_address, phys_address), + TP_STRUCT__entry( + __field(uint16_t, domain) + __field(uint8_t, bus) + __field(uint8_t, slot) + __field(uint8_t, func) + __field(uint64_t, dma) + __field(uint64_t, phys) + ), + TP_fast_assign( + __entry->domain = pci_domain_nr(adev->pdev->bus); + __entry->bus = adev->pdev->bus->number; + __entry->slot = PCI_SLOT(adev->pdev->devfn); + __entry->func = PCI_FUNC(adev->pdev->devfn); + __entry->dma = dma_address; + __entry->phys = phys_address; + ), + TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx", + (unsigned)__entry->domain, + (unsigned)__entry->bus, + (unsigned)__entry->slot, + (unsigned)__entry->func, + (unsigned long long)__entry->dma, + (unsigned long long)__entry->phys) +); + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), @@ -105,12 +161,12 @@ TRACE_EVENT(amdgpu_bo_create, __entry->bo = bo; __entry->pages = bo->tbo.num_pages; __entry->type = bo->tbo.mem.mem_type; - __entry->prefer = bo->prefered_domains; + __entry->prefer = bo->preferred_domains; __entry->allow = bo->allowed_domains; __entry->visible = bo->flags; ), - TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d", + TP_printk("bo=%p, pages=%u, type=%d, preferred=%d, allowed=%d, visible=%d", __entry->bo, __entry->pages, __entry->type, __entry->prefer, __entry->allow, __entry->visible) ); @@ -224,17 +280,17 @@ TRACE_EVENT(amdgpu_vm_bo_map, __field(long, start) __field(long, last) __field(u64, offset) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( - __entry->bo = bo_va ? bo_va->bo : NULL; + __entry->bo = bo_va ? bo_va->base.bo : NULL; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx", __entry->bo, __entry->start, __entry->last, __entry->offset, __entry->flags) ); @@ -248,17 +304,17 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, __field(long, start) __field(long, last) __field(u64, offset) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( - __entry->bo = bo_va->bo; + __entry->bo = bo_va->base.bo; __entry->start = mapping->start; __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), - TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x", + TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%llx", __entry->bo, __entry->start, __entry->last, __entry->offset, __entry->flags) ); @@ -269,7 +325,7 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping, TP_STRUCT__entry( __field(u64, soffset) __field(u64, eoffset) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( @@ -277,7 +333,7 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping, __entry->eoffset = mapping->last + 1; __entry->flags = mapping->flags; ), - TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", + TP_printk("soffs=%010llx, eoffs=%010llx, flags=%llx", __entry->soffset, __entry->eoffset, __entry->flags) ); @@ -293,14 +349,14 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags), + uint32_t incr, uint64_t flags), TP_ARGS(pe, addr, count, incr, flags), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) - __field(u32, flags) + __field(u64, flags) ), TP_fast_assign( @@ -310,7 +366,7 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->incr = incr; __entry->flags = flags; ), - TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%08x, count=%u", + TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u", __entry->pe, __entry->addr, __entry->incr, __entry->flags, __entry->count) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c9b131b13ef74de91a85afa562d2b2446c59a1bd..8b2c294f6f7999371a045aa985dd409ef12dd2d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,14 +43,20 @@ #include #include #include "amdgpu.h" +#include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) +static int amdgpu_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, unsigned num_pages, + uint64_t offset, unsigned window, + struct amdgpu_ring *ring, + uint64_t *addr); + static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); - /* * Global memory. */ @@ -97,6 +103,8 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) goto error_bo; } + mutex_init(&adev->mman.gtt_window_lock); + ring = adev->mman.buffer_funcs_ring; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, @@ -123,6 +131,7 @@ static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) if (adev->mman.mem_global_referenced) { amd_sched_entity_fini(adev->mman.entity.sched, &adev->mman.entity); + mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); drm_global_item_unref(&adev->mman.mem_global_ref); adev->mman.mem_global_referenced = false; @@ -150,7 +159,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_TT: man->func = &amdgpu_gtt_mgr_func; - man->gpu_offset = adev->mc.gtt_start; + man->gpu_offset = adev->mc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; @@ -186,12 +195,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - static struct ttm_place placements = { + static const struct ttm_place placements = { .fpfn = 0, .lpfn = 0, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; - unsigned i; if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; @@ -207,22 +215,36 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, adev->mman.buffer_funcs_ring && adev->mman.buffer_funcs_ring->ready == false) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { + unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + struct drm_mm_node *node = bo->mem.mm_node; + unsigned long pages_left; + + for (pages_left = bo->mem.num_pages; + pages_left; + pages_left -= node->size, node++) { + if (node->start < fpfn) + break; + } + + if (!pages_left) + goto gtt; + + /* Try evicting to the CPU inaccessible part of VRAM + * first, but only set GTT as busy placement, so this + * BO will be evicted to GTT rather than causing other + * BOs to be evicted from VRAM + */ + amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT); + abo->placements[0].fpfn = fpfn; + abo->placements[0].lpfn = 0; + abo->placement.busy_placement = &abo->placements[1]; + abo->placement.num_busy_placement = 1; } else { +gtt: amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); - for (i = 0; i < abo->placement.num_placement; ++i) { - if (!(abo->placements[i].flags & - TTM_PL_FLAG_TT)) - continue; - - if (abo->placements[i].lpfn) - continue; - - /* set an upper limit to force directly - * allocating address space for the BO. - */ - abo->placements[i].lpfn = - adev->mc.gtt_size >> PAGE_SHIFT; - } } break; case TTM_PL_TT: @@ -252,29 +274,18 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo, new_mem->mm_node = NULL; } -static int amdgpu_mm_node_addr(struct ttm_buffer_object *bo, - struct drm_mm_node *mm_node, - struct ttm_mem_reg *mem, - uint64_t *addr) +static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, + struct drm_mm_node *mm_node, + struct ttm_mem_reg *mem) { - int r; + uint64_t addr = 0; - switch (mem->mem_type) { - case TTM_PL_TT: - r = amdgpu_ttm_bind(bo, mem); - if (r) - return r; - - case TTM_PL_VRAM: - *addr = mm_node->start << PAGE_SHIFT; - *addr += bo->bdev->man[mem->mem_type].gpu_offset; - break; - default: - DRM_ERROR("Unknown placement %d\n", mem->mem_type); - return -EINVAL; + if (mem->mem_type != TTM_PL_TT || + amdgpu_gtt_mgr_is_allocated(mem)) { + addr = mm_node->start << PAGE_SHIFT; + addr += bo->bdev->man[mem->mem_type].gpu_offset; } - - return 0; + return addr; } static int amdgpu_move_blit(struct ttm_buffer_object *bo, @@ -299,26 +310,40 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, } old_mm = old_mem->mm_node; - r = amdgpu_mm_node_addr(bo, old_mm, old_mem, &old_start); - if (r) - return r; old_size = old_mm->size; - + old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); new_mm = new_mem->mm_node; - r = amdgpu_mm_node_addr(bo, new_mm, new_mem, &new_start); - if (r) - return r; new_size = new_mm->size; + new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); num_pages = new_mem->num_pages; + mutex_lock(&adev->mman.gtt_window_lock); while (num_pages) { - unsigned long cur_pages = min(old_size, new_size); + unsigned long cur_pages = min(min(old_size, new_size), + (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); + uint64_t from = old_start, to = new_start; struct dma_fence *next; - r = amdgpu_copy_buffer(ring, old_start, new_start, + if (old_mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(old_mem)) { + r = amdgpu_map_buffer(bo, old_mem, cur_pages, + old_start, 0, ring, &from); + if (r) + goto error; + } + + if (new_mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(new_mem)) { + r = amdgpu_map_buffer(bo, new_mem, cur_pages, + new_start, 1, ring, &to); + if (r) + goto error; + } + + r = amdgpu_copy_buffer(ring, from, to, cur_pages * PAGE_SIZE, - bo->resv, &next, false); + bo->resv, &next, false, true); if (r) goto error; @@ -331,10 +356,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, old_size -= cur_pages; if (!old_size) { - r = amdgpu_mm_node_addr(bo, ++old_mm, old_mem, - &old_start); - if (r) - goto error; + old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); old_size = old_mm->size; } else { old_start += cur_pages * PAGE_SIZE; @@ -342,22 +364,21 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, new_size -= cur_pages; if (!new_size) { - r = amdgpu_mm_node_addr(bo, ++new_mm, new_mem, - &new_start); - if (r) - goto error; - + new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); new_size = new_mm->size; } else { new_start += cur_pages * PAGE_SIZE; } } + mutex_unlock(&adev->mman.gtt_window_lock); r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: + mutex_unlock(&adev->mman.gtt_window_lock); + if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); @@ -384,7 +405,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; + placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); @@ -431,7 +452,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gtt_size >> PAGE_SHIFT; + placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); @@ -507,6 +528,15 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, } } + if (bo->type == ttm_bo_type_device && + new_mem->mem_type == TTM_PL_VRAM && + old_mem->mem_type != TTM_PL_VRAM) { + /* amdgpu_bo_fault_reserve_notify will re-set this if the CPU + * accesses the BO after it's moved. + */ + abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + } + /* update statistics */ atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved); return 0; @@ -633,6 +663,38 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) return r; } +static void amdgpu_trace_dma_map(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + if (unlikely(trace_amdgpu_ttm_tt_populate_enabled())) { + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_populate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } + } +} + +static void amdgpu_trace_dma_unmap(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned i; + + if (unlikely(trace_amdgpu_ttm_tt_unpopulate_enabled())) { + for (i = 0; i < ttm->num_pages; i++) { + trace_amdgpu_ttm_tt_unpopulate( + adev, + gtt->ttm.dma_address[i], + page_to_phys(ttm->pages[i])); + } + } +} + /* prepare the sg table with the user pages */ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { @@ -659,6 +721,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); + amdgpu_trace_dma_map(ttm); + return 0; release_sg: @@ -692,14 +756,41 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) put_page(page); } + amdgpu_trace_dma_unmap(ttm); + sg_free_table(ttm->sg); } +static int amdgpu_ttm_do_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + uint64_t flags; + int r; + + spin_lock(>t->adev->gtt_list_lock); + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, mem); + gtt->offset = (u64)mem->start << PAGE_SHIFT; + r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, + ttm->pages, gtt->ttm.dma_address, flags); + + if (r) { + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + ttm->num_pages, gtt->offset); + goto error_gart_bind; + } + + list_add_tail(>t->list, >t->adev->gtt_list); +error_gart_bind: + spin_unlock(>t->adev->gtt_list_lock); + return r; + +} + static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { struct amdgpu_ttm_tt *gtt = (void*)ttm; - int r; + int r = 0; if (gtt->userptr) { r = amdgpu_ttm_tt_pin_userptr(ttm); @@ -718,7 +809,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - return 0; + if (amdgpu_gtt_mgr_is_allocated(bo_mem)) + r = amdgpu_ttm_do_bind(ttm, bo_mem); + + return r; } bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) @@ -731,8 +825,6 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) { struct ttm_tt *ttm = bo->ttm; - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - uint64_t flags; int r; if (!ttm || amdgpu_ttm_is_bound(ttm)) @@ -745,22 +837,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) return r; } - spin_lock(>t->adev->gtt_list_lock); - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); - gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, - ttm->pages, gtt->ttm.dma_address, flags); - - if (r) { - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - ttm->num_pages, gtt->offset); - goto error_gart_bind; - } - - list_add_tail(>t->list, >t->adev->gtt_list); -error_gart_bind: - spin_unlock(>t->adev->gtt_list_lock); - return r; + return amdgpu_ttm_do_bind(ttm, bo_mem); } int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) @@ -852,7 +929,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) { - struct amdgpu_device *adev; + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; int r; @@ -875,14 +952,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); ttm->state = tt_unbound; - return 0; + r = 0; + goto trace_mappings; } - adev = amdgpu_ttm_adev(ttm->bdev); - #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { - return ttm_dma_populate(>t->ttm, adev->dev); + r = ttm_dma_populate(>t->ttm, adev->dev); + goto trace_mappings; } #endif @@ -905,7 +982,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) return -EFAULT; } } - return 0; + + r = 0; +trace_mappings: + if (likely(!r)) + amdgpu_trace_dma_map(ttm); + return r; } static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -926,6 +1008,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) adev = amdgpu_ttm_adev(ttm->bdev); + amdgpu_trace_dma_unmap(ttm); + #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { ttm_dma_unpopulate(>t->ttm, adev->dev); @@ -1075,6 +1159,67 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, + unsigned long offset, + void *buf, int len, int write) +{ + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + uint32_t value = 0; + int ret = 0; + uint64_t pos; + unsigned long flags; + + if (bo->mem.mem_type != TTM_PL_VRAM) + return -EIO; + + while (offset >= (nodes->size << PAGE_SHIFT)) { + offset -= nodes->size << PAGE_SHIFT; + ++nodes; + } + pos = (nodes->start << PAGE_SHIFT) + offset; + + while (len && pos < adev->mc.mc_vram_size) { + uint64_t aligned_pos = pos & ~(uint64_t)3; + uint32_t bytes = 4 - (pos & 3); + uint32_t shift = (pos & 3) * 8; + uint32_t mask = 0xffffffff << shift; + + if (len < bytes) { + mask &= 0xffffffff >> (bytes - len) * 8; + bytes = len; + } + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); + WREG32(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0xffffffff) + value = RREG32(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32(mmMM_DATA, value); + } + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, &value, bytes); + } + + ret += bytes; + buf = (uint8_t *)buf + bytes; + pos += bytes; + len -= bytes; + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { + ++nodes; + pos = (nodes->start << PAGE_SHIFT); + } + } + + return ret; +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -1090,11 +1235,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, + .access_memory = &amdgpu_ttm_access_memory }; int amdgpu_ttm_init(struct amdgpu_device *adev) { + uint64_t gtt_size; int r; + u64 vis_vram_limit; r = amdgpu_ttm_global_init(adev); if (r) { @@ -1118,36 +1266,37 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing VRAM heap.\n"); return r; } + + /* Reduce size of CPU-visible VRAM if requested */ + vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; + if (amdgpu_vis_vram_limit > 0 && + vis_vram_limit <= adev->mc.visible_vram_size) + adev->mc.visible_vram_size = vis_vram_limit; + /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); - r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->stollen_vga_memory); - if (r) { - return r; - } - r = amdgpu_bo_reserve(adev->stollen_vga_memory, false); + r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->stolen_vga_memory, + NULL, NULL); if (r) return r; - r = amdgpu_bo_pin(adev->stollen_vga_memory, AMDGPU_GEM_DOMAIN_VRAM, NULL); - amdgpu_bo_unreserve(adev->stollen_vga_memory); - if (r) { - amdgpu_bo_unref(&adev->stollen_vga_memory); - return r; - } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); - r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, - adev->mc.gtt_size >> PAGE_SHIFT); + + if (amdgpu_gtt_size == -1) + gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); + else + gtt_size = (uint64_t)amdgpu_gtt_size << 20; + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing GTT heap.\n"); return r; } DRM_INFO("amdgpu: %uM of GTT memory ready.\n", - (unsigned)(adev->mc.gtt_size / (1024 * 1024))); + (unsigned)(gtt_size / (1024 * 1024))); adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; @@ -1203,13 +1352,13 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (!adev->mman.initialized) return; amdgpu_ttm_debugfs_fini(adev); - if (adev->stollen_vga_memory) { - r = amdgpu_bo_reserve(adev->stollen_vga_memory, true); + if (adev->stolen_vga_memory) { + r = amdgpu_bo_reserve(adev->stolen_vga_memory, true); if (r == 0) { - amdgpu_bo_unpin(adev->stollen_vga_memory); - amdgpu_bo_unreserve(adev->stollen_vga_memory); + amdgpu_bo_unpin(adev->stolen_vga_memory); + amdgpu_bo_unreserve(adev->stolen_vga_memory); } - amdgpu_bo_unref(&adev->stollen_vga_memory); + amdgpu_bo_unref(&adev->stolen_vga_memory); } ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); @@ -1256,12 +1405,77 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) return ttm_bo_mmap(filp, vma, &adev->mman.bdev); } -int amdgpu_copy_buffer(struct amdgpu_ring *ring, - uint64_t src_offset, - uint64_t dst_offset, - uint32_t byte_count, +static int amdgpu_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, unsigned num_pages, + uint64_t offset, unsigned window, + struct amdgpu_ring *ring, + uint64_t *addr) +{ + struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + struct amdgpu_device *adev = ring->adev; + struct ttm_tt *ttm = bo->ttm; + struct amdgpu_job *job; + unsigned num_dw, num_bytes; + dma_addr_t *dma_address; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t flags; + int r; + + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + + *addr = adev->mc.gart_start; + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE; + + num_dw = adev->mman.buffer_funcs->copy_num_dw; + while (num_dw & 0x7) + num_dw++; + + num_bytes = num_pages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = adev->gart.table_addr; + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; + flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + &job->ibs[0].ptr[num_dw]); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, ring, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, + uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct dma_fence **fence, bool direct_submit) + struct dma_fence **fence, bool direct_submit, + bool vm_needs_flush) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -1283,6 +1497,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, if (r) return r; + job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, AMDGPU_FENCE_OWNER_UNDEFINED); @@ -1327,11 +1542,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, + uint64_t src_data, struct reservation_object *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -1347,6 +1563,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, return -EINVAL; } + if (bo->tbo.mem.mem_type == TTM_PL_TT) { + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (r) + return r; + } + num_pages = bo->tbo.num_pages; mm_node = bo->tbo.mem.mm_node; num_loops = 0; @@ -1357,7 +1579,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + + /* 10 double words for each SDMA_OP_PTEPDE cmd */ + num_dw = num_loops * 10; /* for IB padding */ num_dw += 64; @@ -1382,16 +1606,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; - r = amdgpu_mm_node_addr(&bo->tbo, mm_node, - &bo->tbo.mem, &dst_addr); - if (r) - return r; + WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); + dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, - dst_addr, cur_size_in_bytes); + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], + dst_addr, 0, + cur_size_in_bytes >> 3, 0, + src_data); dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -1417,32 +1641,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, #if defined(CONFIG_DEBUG_FS) -extern void amdgpu_gtt_mgr_print(struct seq_file *m, struct ttm_mem_type_manager - *man); static int amdgpu_mm_dump_table(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; unsigned ttm_pl = *(int *)node->info_ent->data; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - struct drm_mm *mm = (struct drm_mm *)adev->mman.bdev.man[ttm_pl].priv; - struct ttm_bo_global *glob = adev->mman.bdev.glob; + struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; struct drm_printer p = drm_seq_file_printer(m); - spin_lock(&glob->lru_lock); - drm_mm_print(mm, &p); - spin_unlock(&glob->lru_lock); - switch (ttm_pl) { - case TTM_PL_VRAM: - seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", - adev->mman.bdev.man[ttm_pl].size, - (u64)atomic64_read(&adev->vram_usage) >> 20, - (u64)atomic64_read(&adev->vram_vis_usage) >> 20); - break; - case TTM_PL_TT: - amdgpu_gtt_mgr_print(m, &adev->mman.bdev.man[TTM_PL_TT]); - break; - } + man->func->debug(man, &p); return 0; } @@ -1574,7 +1782,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) adev, &amdgpu_ttm_gtt_fops); if (IS_ERR(ent)) return PTR_ERR(ent); - i_size_write(ent->d_inode, adev->mc.gtt_size); + i_size_write(ent->d_inode, adev->mc.gart_size); adev->mman.gtt = ent; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6bdede8ff12b4c37f1e679de83f7afd54014cc0b..f22a4758719da1121bd9368b857cb14fa42bb958 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -34,6 +34,9 @@ #define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1) #define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2) +#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 +#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 + struct amdgpu_mman { struct ttm_bo_global_ref bo_global_ref; struct drm_global_reference mem_global_ref; @@ -49,6 +52,8 @@ struct amdgpu_mman { /* buffer handling */ const struct amdgpu_buffer_funcs *buffer_funcs; struct amdgpu_ring *buffer_funcs_ring; + + struct mutex gtt_window_lock; /* Scheduler entity for buffer moves */ struct amd_sched_entity entity; }; @@ -56,24 +61,29 @@ struct amdgpu_mman { extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; +bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, struct ttm_buffer_object *tbo, const struct ttm_place *place, struct ttm_mem_reg *mem); +uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); + +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); -int amdgpu_copy_buffer(struct amdgpu_ring *ring, - uint64_t src_offset, - uint64_t dst_offset, - uint32_t byte_count, +int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, + uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct dma_fence **fence, bool direct_submit); + struct dma_fence **fence, bool direct_submit, + bool vm_needs_flush); int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, + uint64_t src_data, struct reservation_object *resv, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); +int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 4f50eeb6585534b54a5408360611798ccd2e447f..36c763310df5f6402fc3d28be2f2ab5b85245f9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -275,14 +275,10 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) else return AMDGPU_FW_LOAD_PSP; case CHIP_RAVEN: -#if 0 - if (!load_type) + if (load_type != 2) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; -#else - return AMDGPU_FW_LOAD_DIRECT; -#endif default: DRM_ERROR("Unknow firmware load type\n"); } @@ -362,8 +358,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, (le32_to_cpu(header->jt_offset) * 4); memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4); - ucode->ucode_size += le32_to_cpu(header->jt_size) * 4; - return 0; } @@ -377,10 +371,15 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) struct amdgpu_firmware_info *ucode = NULL; const struct common_firmware_header *header = NULL; + if (!adev->firmware.fw_size) { + dev_warn(adev->dev, "No ip firmware need to load\n"); + return 0; + } + err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo); + NULL, NULL, 0, bo); if (err) { dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); goto failed; @@ -459,6 +458,9 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) int i; struct amdgpu_firmware_info *ucode = NULL; + if (!adev->firmware.fw_size) + return 0; + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 2ca09f111f08d02838cea38e3d7148752c6ffc0b..e19928dae8e3fa92d48c3047a1bda98107e6b595 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -588,6 +588,10 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, } break; + case 8: /* MJPEG */ + min_dpb_size = 0; + break; + case 16: /* H265 */ image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; image_size = ALIGN(image_size, 256); @@ -1051,7 +1055,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; @@ -1101,7 +1105,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index b692ad4022521a5244f23e846430898a3a3bd9d4..c855366521abc527d6ed9188ae664892aa94d8ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -937,9 +937,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r, timeout = adev->usec_timeout; - /* workaround VCE ring test slow issue for sriov*/ + /* skip ring test for sriov*/ if (amdgpu_sriov_vf(adev)) - timeout *= 10; + return 0; r = amdgpu_ring_alloc(ring, 16); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 09190fadd2288c011c4cf82e67d744825334450e..041e0121590c96bb34e17444d32ec0186efeb539 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -209,9 +209,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (fences == 0) { if (adev->pm.dpm_enabled) { + /* might be used when with pg/cg amdgpu_dpm_enable_uvd(adev, false); - } else { - amdgpu_asic_set_uvd_clocks(adev, 0, 0); + */ } } else { schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); @@ -223,12 +223,10 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); - if (set_clocks) { - if (adev->pm.dpm_enabled) { - amdgpu_dpm_enable_uvd(adev, true); - } else { - amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); - } + if (set_clocks && adev->pm.dpm_enabled) { + /* might be used when with pg/cg + amdgpu_dpm_enable_uvd(adev, true); + */ } } @@ -361,7 +359,7 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; @@ -413,7 +411,7 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &bo); + NULL, NULL, 0, &bo); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c new file mode 100644 index 0000000000000000000000000000000000000000..45ac918619658a34acdfdbbbbda2002a37163c58 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c @@ -0,0 +1,85 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_vf_error.h" +#include "mxgpu_ai.h" + +#define AMDGPU_VF_ERROR_ENTRY_SIZE 16 + +/* struct error_entry - amdgpu VF error information. */ +struct amdgpu_vf_error_buffer { + int read_count; + int write_count; + uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE]; + uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE]; + uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; +}; + +struct amdgpu_vf_error_buffer admgpu_vf_errors; + + +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data) +{ + int index; + uint16_t error_code = AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF, sub_error_code); + + index = admgpu_vf_errors.write_count % AMDGPU_VF_ERROR_ENTRY_SIZE; + admgpu_vf_errors.code [index] = error_code; + admgpu_vf_errors.flags [index] = error_flags; + admgpu_vf_errors.data [index] = error_data; + admgpu_vf_errors.write_count ++; +} + + +void amdgpu_vf_error_trans_all(struct amdgpu_device *adev) +{ + /* u32 pf2vf_flags = 0; */ + u32 data1, data2, data3; + int index; + + if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops) || (!adev->virt.ops->trans_msg)) { + return; + } +/* + TODO: Enable these code when pv2vf_info is merged + AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags); + if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) { + return; + } +*/ + /* The errors are overlay of array, correct read_count as full. */ + if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count > AMDGPU_VF_ERROR_ENTRY_SIZE) { + admgpu_vf_errors.read_count = admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE; + } + + while (admgpu_vf_errors.read_count < admgpu_vf_errors.write_count) { + index =admgpu_vf_errors.read_count % AMDGPU_VF_ERROR_ENTRY_SIZE; + data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]); + data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF; + data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF; + + adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR, data1, data2, data3); + admgpu_vf_errors.read_count ++; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h new file mode 100644 index 0000000000000000000000000000000000000000..2a3278ec76bad2134e1c471e5366fdbebf27716b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.h @@ -0,0 +1,62 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VF_ERROR_H__ +#define __VF_ERROR_H__ + +#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f) (((c & 0xFFFF) << 16) | (f & 0xFFFF)) +#define AMDGIM_ERROR_CODE(t,c) (((t&0xF)<<12)|(c&0xFFF)) + +/* Please keep enum same as AMD GIM driver */ +enum AMDGIM_ERROR_VF { + AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL = 0, + AMDGIM_ERROR_VF_NO_VBIOS, + AMDGIM_ERROR_VF_GPU_POST_ERROR, + AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, + AMDGIM_ERROR_VF_FENCE_INIT_FAIL, + + AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, + AMDGIM_ERROR_VF_IB_INIT_FAIL, + AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, + AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, + AMDGIM_ERROR_VF_GPU_RESET_FAIL, + + AMDGIM_ERROR_VF_TEST, + AMDGIM_ERROR_VF_MAX +}; + +enum AMDGIM_ERROR_CATEGORY { + AMDGIM_ERROR_CATEGORY_NON_USED = 0, + AMDGIM_ERROR_CATEGORY_GIM, + AMDGIM_ERROR_CATEGORY_PF, + AMDGIM_ERROR_CATEGORY_VF, + AMDGIM_ERROR_CATEGORY_VBIOS, + AMDGIM_ERROR_CATEGORY_MONITOR, + + AMDGIM_ERROR_CATEGORY_MAX +}; + +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags, uint64_t error_data); +void amdgpu_vf_error_trans_all (struct amdgpu_device *adev); + +#endif /* __VF_ERROR_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8a081e162d13cb6f6cc6b674fd82cd52180cb40c..ab05121b9272b9f11bf7ab52aa0ca085149aeeca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -46,14 +46,14 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) * address within META_DATA init package to support SRIOV gfx preemption. */ -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va) { - int r; - struct amdgpu_bo_va *bo_va; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; struct ttm_validate_buffer csa_tv; + int r; INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&csa_tv.head); @@ -69,34 +69,33 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; } - bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); - if (!bo_va) { + *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); + if (!*bo_va) { ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("failed to create bo_va for static CSA\n"); return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, - AMDGPU_CSA_SIZE); + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, + AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_rmv(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } - r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_rmv(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } - vm->csa_bo_va = bo_va; ttm_eu_backoff_reservation(&ticket, &list); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 9e1062edb76eb0649a0a97291c320222fd21bfa9..afcfb8bcfb65edda5e37def8ace207e0ded84ae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -43,6 +43,7 @@ struct amdgpu_virt_ops { int (*req_full_gpu)(struct amdgpu_device *adev, bool init); int (*rel_full_gpu)(struct amdgpu_device *adev, bool init); int (*reset_gpu)(struct amdgpu_device *adev); + void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); }; /* GPU virtualization */ @@ -89,7 +90,8 @@ static inline bool is_virtual_machine(void) struct amdgpu_vm; int amdgpu_allocate_static_csa(struct amdgpu_device *adev); -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5795f81369f0fc6e68f0659c400a700091713db8..6b1343e5541d3f2a2c0415cd33593c6c1f188a08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -77,8 +77,6 @@ struct amdgpu_pte_update_params { void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); - /* indicate update pt or its shadow */ - bool shadow; /* The next two are used during VM update by CPU * DMA addresses to use for mapping * Kernel pointer of PD/PT BO that needs to be updated @@ -161,11 +159,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, */ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, int (*validate)(void *, struct amdgpu_bo *), - void *param) + void *param, bool use_cpu_for_update, + struct ttm_bo_global *glob) { unsigned i; int r; + if (parent->bo->shadow) { + struct amdgpu_bo *shadow = parent->bo->shadow; + + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + if (r) + return r; + } + + if (use_cpu_for_update) { + r = amdgpu_bo_kmap(parent->bo, NULL); + if (r) + return r; + } + if (!parent->entries) return 0; @@ -179,11 +192,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, if (r) return r; + spin_lock(&glob->lru_lock); + ttm_bo_move_to_lru_tail(&entry->bo->tbo); + if (entry->bo->shadow) + ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); + spin_unlock(&glob->lru_lock); + /* * Recurse into the sub directory. This is harmless because we * have only a maximum of 5 layers. */ - r = amdgpu_vm_validate_level(entry, validate, param); + r = amdgpu_vm_validate_level(entry, validate, param, + use_cpu_for_update, glob); if (r) return r; } @@ -214,54 +234,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (num_evictions == vm->last_eviction_counter) return 0; - return amdgpu_vm_validate_level(&vm->root, validate, param); -} - -/** - * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) -{ - unsigned i; - - if (!parent->entries) - return; - - for (i = 0; i <= parent->last_entry_used; ++i) { - struct amdgpu_vm_pt *entry = &parent->entries[i]; - - if (!entry->bo) - continue; - - ttm_bo_move_to_lru_tail(&entry->bo->tbo); - amdgpu_vm_move_level_in_lru(entry); - } + return amdgpu_vm_validate_level(&vm->root, validate, param, + vm->use_cpu_for_update, + adev->mman.bdev.glob); } /** - * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct ttm_bo_global *glob = adev->mman.bdev.glob; - - spin_lock(&glob->lru_lock); - amdgpu_vm_move_level_in_lru(&vm->root); - spin_unlock(&glob->lru_lock); -} - - /** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer @@ -282,6 +260,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, unsigned pt_idx, from, to; int r; u64 flags; + uint64_t init_value = 0; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -315,6 +294,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); + if (vm->pte_support_ats) { + init_value = AMDGPU_PTE_SYSTEM; + if (level != adev->vm_manager.num_level - 1) + init_value |= AMDGPU_PDE_PTE; + } + /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -327,10 +312,18 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, resv, &pt); + NULL, resv, init_value, &pt); if (r) return r; + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(pt, NULL); + if (r) { + amdgpu_bo_unref(&pt); + return r; + } + } + /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ @@ -424,7 +417,7 @@ static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, struct dma_fence *updates = sync->last_vm_update; int r = 0; struct dma_fence *flushed, *tmp; - bool needs_flush = false; + bool needs_flush = vm->use_cpu_for_update; flushed = id->flushed_updates; if ((amdgpu_vm_had_gpu_reset(adev, id)) || @@ -545,11 +538,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } kfree(fences); - job->vm_needs_flush = false; + job->vm_needs_flush = vm->use_cpu_for_update; /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { struct dma_fence *flushed; - bool needs_flush = false; + bool needs_flush = vm->use_cpu_for_update; /* Check all the prerequisites to using this VMID */ if (amdgpu_vm_had_gpu_reset(adev, id)) @@ -745,7 +738,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) * * Emit a VM flush when it is necessary. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -767,12 +760,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) vm_flush_needed = true; } - if (!vm_flush_needed && !gds_switch_needed) + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); + if (need_pipe_sync) + amdgpu_ring_emit_pipeline_sync(ring); + if (ring->funcs->emit_vm_flush && vm_flush_needed) { struct dma_fence *fence; @@ -874,8 +870,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, { struct amdgpu_bo_va *bo_va; - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { + list_for_each_entry(bo_va, &bo->va, base.bo_list) { + if (bo_va->base.vm == vm) { return bo_va; } } @@ -981,6 +977,8 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, unsigned int i; uint64_t value; + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + for (i = 0; i < count; i++) { value = params->pages_addr ? amdgpu_vm_map_gart(params->pages_addr, addr) : @@ -989,19 +987,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, i, value, flags); addr += incr; } - - /* Flush HDP */ - mb(); - amdgpu_gart_flush_gpu_tlb(params->adev, 0); } -static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo) +static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, + void *owner) { struct amdgpu_sync sync; int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1042,23 +1037,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, params.adev = adev; shadow = parent->bo->shadow; - WARN_ON(vm->use_cpu_for_update && shadow); - if (vm->use_cpu_for_update && !shadow) { - r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); - if (r) - return r; - r = amdgpu_vm_bo_wait(adev, parent->bo); - if (unlikely(r)) { - amdgpu_bo_kunmap(parent->bo); + if (vm->use_cpu_for_update) { + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + if (unlikely(r)) return r; - } + params.func = amdgpu_vm_cpu_set_ptes; } else { - if (shadow) { - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); - if (r) - return r; - } ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); @@ -1094,21 +1080,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (bo == NULL) continue; - if (bo->shadow) { - struct amdgpu_bo *pt_shadow = bo->shadow; - - r = amdgpu_ttm_bind(&pt_shadow->tbo, - &pt_shadow->tbo.mem); - if (r) - return r; - } - pt = amdgpu_bo_gpu_offset(bo); pt = amdgpu_gart_get_vm_pde(adev, pt); - if (parent->entries[pt_idx].addr == pt) + /* Don't update huge pages here */ + if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || + parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) continue; - parent->entries[pt_idx].addr = pt; + parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || @@ -1146,28 +1125,29 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, count, incr, AMDGPU_PTE_VALID); } - if (params.func == amdgpu_vm_cpu_set_ptes) - amdgpu_bo_kunmap(parent->bo); - else if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, + if (!vm->use_cpu_for_update) { + if (params.ib->length_dw == 0) { + amdgpu_job_free(job); + } else { + amdgpu_ring_pad_ib(ring, params.ib); + amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + if (shadow) + amdgpu_sync_resv(adev, &job->sync, + shadow->tbo.resv, + AMDGPU_FENCE_OWNER_VM); + + WARN_ON(params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) + goto error_free; - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); - if (r) - goto error_free; - - amdgpu_bo_fence(parent->bo, fence, true); - dma_fence_put(vm->last_dir_update); - vm->last_dir_update = dma_fence_get(fence); - dma_fence_put(fence); + amdgpu_bo_fence(parent->bo, fence, true); + dma_fence_put(vm->last_dir_update); + vm->last_dir_update = dma_fence_get(fence); + dma_fence_put(fence); + } } /* * Recurse into the subdirectories. This recursion is harmless because @@ -1235,33 +1215,98 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, if (r) amdgpu_vm_invalidate_level(&vm->root); + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(adev, 0); + } + return r; } /** - * amdgpu_vm_find_pt - find the page table for an address + * amdgpu_vm_find_entry - find the entry for an address * * @p: see amdgpu_pte_update_params definition * @addr: virtual address in question + * @entry: resulting entry or NULL + * @parent: parent entry * - * Find the page table BO for a virtual address, return NULL when none found. + * Find the vm_pt entry and it's parent for the given address. */ -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, - uint64_t addr) +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, + struct amdgpu_vm_pt **entry, + struct amdgpu_vm_pt **parent) { - struct amdgpu_vm_pt *entry = &p->vm->root; unsigned idx, level = p->adev->vm_manager.num_level; - while (entry->entries) { + *parent = NULL; + *entry = &p->vm->root; + while ((*entry)->entries) { idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size(entry->bo) / 8; - entry = &entry->entries[idx]; + idx %= amdgpu_bo_size((*entry)->bo) / 8; + *parent = *entry; + *entry = &(*entry)->entries[idx]; } if (level) - return NULL; + *entry = NULL; +} + +/** + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * + * @p: see amdgpu_pte_update_params definition + * @entry: vm_pt entry to check + * @parent: parent entry + * @nptes: number of PTEs updated with this operation + * @dst: destination address where the PTEs should point to + * @flags: access flags fro the PTEs + * + * Check if we can update the PD with a huge page. + */ +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + struct amdgpu_vm_pt *entry, + struct amdgpu_vm_pt *parent, + unsigned nptes, uint64_t dst, + uint64_t flags) +{ + bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); + uint64_t pd_addr, pde; + + /* In the case of a mixed PT the PDE must point to it*/ + if (p->adev->asic_type < CHIP_VEGA10 || + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || + p->func == amdgpu_vm_do_copy_ptes || + !(flags & AMDGPU_PTE_VALID)) { + + dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_gart_get_vm_pde(p->adev, dst); + flags = AMDGPU_PTE_VALID; + } else { + /* Set the huge page flag to stop scanning at this PDE */ + flags |= AMDGPU_PDE_PTE; + } + + if (entry->addr == (dst | flags)) + return; + + entry->addr = (dst | flags); - return entry->bo; + if (use_cpu_update) { + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); + } else { + if (parent->bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } } /** @@ -1287,49 +1332,44 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t addr, pe_start; struct amdgpu_bo *pt; unsigned nptes; - int r; bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; addr += nptes) { - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return -EINVAL; - } + for (addr = start; addr < end; addr += nptes, + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { + struct amdgpu_vm_pt *entry, *parent; - if (params->shadow) { - if (WARN_ONCE(use_cpu_update, - "CPU VM update doesn't suuport shadow pages")) - return 0; - - if (!pt->shadow) - return 0; - pt = pt->shadow; - } + amdgpu_vm_get_entry(params, addr, &entry, &parent); + if (!entry) + return -ENOENT; if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); + amdgpu_vm_handle_huge_pages(params, entry, parent, + nptes, dst, flags); + /* We don't need to update PTEs for huge pages */ + if (entry->addr & AMDGPU_PDE_PTE) + continue; + + pt = entry->bo; if (use_cpu_update) { - r = amdgpu_bo_kmap(pt, (void *)&pe_start); - if (r) - return r; - } else + pe_start = (unsigned long)amdgpu_bo_kptr(pt); + } else { + if (pt->shadow) { + pe_start = amdgpu_bo_gpu_offset(pt->shadow); + pe_start += (addr & mask) * 8; + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + } pe_start = amdgpu_bo_gpu_offset(pt); + } pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); - - dst += nptes * AMDGPU_GPU_PAGE_SIZE; - - if (use_cpu_update) - amdgpu_bo_kunmap(pt); } return 0; @@ -1370,10 +1410,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * Userspace can support this by aligning virtual base address and * allocation size to the fragment size. */ - - /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + unsigned pages_per_frag = params->adev->vm_manager.fragment_size; + uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); + uint64_t frag_align = 1 << pages_per_frag; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); @@ -1445,6 +1484,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.vm = vm; params.src = src; + /* sync to everything on unmapping */ + if (!(flags & AMDGPU_PTE_VALID)) + owner = AMDGPU_FENCE_OWNER_UNDEFINED; + if (vm->use_cpu_for_update) { /* params.src is used as flag to indicate system Memory */ if (pages_addr) @@ -1453,23 +1496,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* Wait for PT BOs to be free. PTs share the same resv. object * as the root PD BO */ - r = amdgpu_vm_bo_wait(adev, vm->root.bo); + r = amdgpu_vm_wait_pd(adev, vm, owner); if (unlikely(r)) return r; params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; - params.shadow = false; return amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); } ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* sync to everything on unmapping */ - if (!(flags & AMDGPU_PTE_VALID)) - owner = AMDGPU_FENCE_OWNER_UNDEFINED; - nptes = last - start + 1; /* @@ -1481,6 +1519,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; + /* one PDE write for each huge page */ + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; + if (src) { /* only copy commands needed */ ndw += ncmds * 7; @@ -1542,11 +1583,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - params.shadow = true; - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); - if (r) - goto error_free; - params.shadow = false; r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; @@ -1565,6 +1601,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); + amdgpu_vm_invalidate_level(&vm->root); return r; } @@ -1687,7 +1724,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; uint64_t gtt_flags, flags; @@ -1696,27 +1734,27 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct dma_fence *exclusive; int r; - if (clear || !bo_va->bo) { + if (clear || !bo_va->base.bo) { mem = NULL; nodes = NULL; exclusive = NULL; } else { struct ttm_dma_tt *ttm; - mem = &bo_va->bo->tbo.mem; + mem = &bo_va->base.bo->tbo.mem; nodes = mem->mm_node; if (mem->mem_type == TTM_PL_TT) { - ttm = container_of(bo_va->bo->tbo.ttm, struct - ttm_dma_tt, ttm); + ttm = container_of(bo_va->base.bo->tbo.ttm, + struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); + exclusive = reservation_object_get_excl(bo->tbo.resv); } - if (bo_va->bo) { - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && - adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? + if (bo) { + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) && + adev == amdgpu_ttm_adev(bo->tbo.bdev)) ? flags : 0; } else { flags = 0x0; @@ -1724,7 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } spin_lock(&vm->status_lock); - if (!list_empty(&bo_va->vm_status)) + if (!list_empty(&bo_va->base.vm_status)) list_splice_init(&bo_va->valids, &bo_va->invalids); spin_unlock(&vm->status_lock); @@ -1747,11 +1785,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); - list_del_init(&bo_va->vm_status); + list_del_init(&bo_va->base.vm_status); if (clear) - list_add(&bo_va->vm_status, &vm->cleared); + list_add(&bo_va->base.vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(adev, 0); + } + return 0; } @@ -1905,15 +1949,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; struct dma_fence *f = NULL; int r; + uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); + if (vm->pte_support_ats) + init_pte_value = AMDGPU_PTE_SYSTEM; + r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, mapping->start, mapping->last, - 0, 0, &f); + init_pte_value, 0, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -1933,26 +1981,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, } /** - * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT + * amdgpu_vm_clear_moved - clear moved BOs in the PT * * @adev: amdgpu_device pointer * @vm: requested vm * - * Make sure all invalidated BOs are cleared in the PT. + * Make sure all moved BOs are cleared in the PT. * Returns 0 for success. * * PTs have to be reserved and mutex must be locked! */ -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_sync *sync) +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = NULL; int r = 0; spin_lock(&vm->status_lock); - while (!list_empty(&vm->invalidated)) { - bo_va = list_first_entry(&vm->invalidated, - struct amdgpu_bo_va, vm_status); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, + struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); r = amdgpu_vm_bo_update(adev, bo_va, true); @@ -1992,16 +2040,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->vm = vm; - bo_va->bo = bo; + bo_va->base.vm = vm; + bo_va->base.bo = bo; + INIT_LIST_HEAD(&bo_va->base.bo_list); + INIT_LIST_HEAD(&bo_va->base.vm_status); + bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - INIT_LIST_HEAD(&bo_va->vm_status); if (bo) - list_add_tail(&bo_va->bo_list, &bo->va); + list_add_tail(&bo_va->base.bo_list, &bo->va); return bo_va; } @@ -2026,7 +2075,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; /* validate the parameters */ @@ -2037,7 +2087,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2047,7 +2097,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (tmp) { /* bo and tmp overlap, invalid addr */ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " - "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, + "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, tmp->start, tmp->last + 1); return -EINVAL; } @@ -2092,7 +2142,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; int r; @@ -2104,7 +2155,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; /* Allocate all the needed memory */ @@ -2112,7 +2163,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, if (!mapping) return -ENOMEM; - r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); + r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); if (r) { kfree(mapping); return r; @@ -2152,7 +2203,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t saddr) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2300,12 +2351,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; - list_del(&bo_va->bo_list); + list_del(&bo_va->base.bo_list); spin_lock(&vm->status_lock); - list_del(&bo_va->vm_status); + list_del(&bo_va->base.vm_status); spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { @@ -2337,13 +2388,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo) { - struct amdgpu_bo_va *bo_va; + struct amdgpu_vm_bo_base *bo_base; - list_for_each_entry(bo_va, &bo->va, bo_list) { - spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) - list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); + list_for_each_entry(bo_base, &bo->va, bo_list) { + spin_lock(&bo_base->vm->status_lock); + if (list_empty(&bo_base->vm_status)) + list_add(&bo_base->vm_status, + &bo_base->vm->moved); + spin_unlock(&bo_base->vm->status_lock); } } @@ -2361,12 +2413,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) } /** - * amdgpu_vm_adjust_size - adjust vm size and block size + * amdgpu_vm_set_fragment_size - adjust fragment size in PTE + * + * @adev: amdgpu_device pointer + * @fragment_size_default: the default fragment size if it's set auto + */ +void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) +{ + if (amdgpu_vm_fragment_size == -1) + adev->vm_manager.fragment_size = fragment_size_default; + else + adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; +} + +/** + * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size * * @adev: amdgpu_device pointer * @vm_size: the default vm size if it's set auto */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default) { /* adjust vm size firstly */ if (amdgpu_vm_size == -1) @@ -2381,8 +2447,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) else adev->vm_manager.block_size = amdgpu_vm_block_size; - DRM_INFO("vm size is %llu GB, block size is %u-bit\n", - adev->vm_manager.vm_size, adev->vm_manager.block_size); + amdgpu_vm_set_fragment_size(adev, fragment_size_default); + + DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", + adev->vm_manager.vm_size, adev->vm_manager.block_size, + adev->vm_manager.fragment_size); } /** @@ -2404,13 +2473,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amd_sched_rq *rq; int r, i; u64 flags; + uint64_t init_pde_value = 0; vm->va = RB_ROOT; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); - INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); @@ -2425,10 +2495,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) + vm->pte_support_ats = false; + + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - else + + if (adev->asic_type == CHIP_RAVEN) { + vm->pte_support_ats = true; + init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + } + } else vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); DRM_DEBUG_DRIVER("VM update mode is %s\n", @@ -2448,7 +2525,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, NULL, &vm->root.bo); + NULL, NULL, init_pde_value, &vm->root.bo); if (r) goto error_free_sched_entity; @@ -2457,6 +2534,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error_free_root; vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(vm->root.bo, NULL); + if (r) + goto error_free_root; + } + amdgpu_bo_unreserve(vm->root.bo); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 936f158bc5ec651fae06acc5e9a62a0ce0763355..ba6691b58ee72f4e785bcf83a643065a3e8b89e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -50,9 +50,6 @@ struct amdgpu_bo_list_entry; /* PTBs (Page Table Blocks) need to be aligned to 32K */ #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 -/* LOG2 number of continuous pages for the fragment field */ -#define AMDGPU_LOG2_PAGES_PER_FRAG 4 - #define AMDGPU_PTE_VALID (1ULL << 0) #define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SNOOPED (1ULL << 2) @@ -68,6 +65,9 @@ struct amdgpu_bo_list_entry; /* TILED for VEGA10, reserved for older ASICs */ #define AMDGPU_PTE_PRT (1ULL << 51) +/* PDE is handled as PTE for VEGA10 */ +#define AMDGPU_PDE_PTE (1ULL << 54) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) @@ -94,6 +94,18 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) +/* base structure for tracking BO usage in a VM */ +struct amdgpu_vm_bo_base { + /* constant after initialization */ + struct amdgpu_vm *vm; + struct amdgpu_bo *bo; + + /* protected by bo being reserved */ + struct list_head bo_list; + + /* protected by spinlock */ + struct list_head vm_status; +}; struct amdgpu_vm_pt { struct amdgpu_bo *bo; @@ -112,7 +124,7 @@ struct amdgpu_vm { spinlock_t status_lock; /* BOs moved, but not yet updated in the PT */ - struct list_head invalidated; + struct list_head moved; /* BOs cleared in the PT because of a move */ struct list_head cleared; @@ -135,11 +147,12 @@ struct amdgpu_vm { u64 client_id; /* dedicated to vm */ struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; - /* each VM will map on CSA */ - struct amdgpu_bo_va *csa_bo_va; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; + + /* Flag to indicate ATS support from PTE for GFX9 */ + bool pte_support_ats; }; struct amdgpu_vm_id { @@ -182,6 +195,7 @@ struct amdgpu_vm_manager { uint32_t num_level; uint64_t vm_size; uint32_t block_size; + uint32_t fragment_size; /* vram base address for page table entry */ u64 vram_base_offset; /* vm pte handling */ @@ -214,15 +228,13 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size); int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct dma_fence *fence, struct amdgpu_job *job); -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid); void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); @@ -231,8 +243,8 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_sync *sync); +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); @@ -259,7 +271,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, uint64_t saddr, uint64_t size); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); +void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, + uint32_t fragment_size_default); +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, + uint32_t fragment_size_default); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a2c59a08b2bd69919b23989feddff71a88855617..26e90062797173d772b4c71c2f3229ebb1bfbbbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,6 +28,8 @@ struct amdgpu_vram_mgr { struct drm_mm mm; spinlock_t lock; + atomic64_t usage; + atomic64_t vis_usage; }; /** @@ -78,6 +80,27 @@ static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) return 0; } +/** + * amdgpu_vram_mgr_vis_size - Calculate visible node size + * + * @adev: amdgpu device structure + * @node: MM node structure + * + * Calculate how many bytes of the MM node are inside visible VRAM + */ +static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, + struct drm_mm_node *node) +{ + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; + + if (start >= adev->mc.visible_vram_size) + return 0; + + return (end > adev->mc.visible_vram_size ? + adev->mc.visible_vram_size : end) - start; +} + /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -93,11 +116,13 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm *mm = &mgr->mm; struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; unsigned long lpfn, num_nodes, pages_per_node, pages_left; + uint64_t usage = 0, vis_usage = 0; unsigned i; int r; @@ -142,6 +167,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; + usage += nodes[i].size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); + /* Calculate a virtual BO start address to easily check if * everything is CPU accessible. */ @@ -155,6 +183,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, } spin_unlock(&mgr->lock); + atomic64_add(usage, &mgr->usage); + atomic64_add(vis_usage, &mgr->vis_usage); + mem->mm_node = nodes; return 0; @@ -181,8 +212,10 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm_node *nodes = mem->mm_node; + uint64_t usage = 0, vis_usage = 0; unsigned pages = mem->num_pages; if (!mem->mm_node) @@ -192,31 +225,67 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, while (pages) { pages -= nodes->size; drm_mm_remove_node(nodes); + usage += nodes->size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } spin_unlock(&mgr->lock); + atomic64_sub(usage, &mgr->usage); + atomic64_sub(vis_usage, &mgr->vis_usage); + kfree(mem->mm_node); mem->mm_node = NULL; } +/** + * amdgpu_vram_mgr_usage - how many bytes are used in this domain + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in this domain. + */ +uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->usage); +} + +/** + * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part + * + * @man: TTM memory type manager + * + * Returns how many bytes are used in the visible part of VRAM + */ +uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man) +{ + struct amdgpu_vram_mgr *mgr = man->priv; + + return atomic64_read(&mgr->vis_usage); +} + /** * amdgpu_vram_mgr_debug - dump VRAM table * * @man: TTM memory type manager - * @prefix: text prefix + * @printer: DRM printer to use * * Dump the table content using printk. */ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, - const char *prefix) + struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = man->priv; - struct drm_printer p = drm_debug_printer(prefix); spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, &p); + drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); + + drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", + man->size, amdgpu_vram_mgr_usage(man) >> 20, + amdgpu_vram_mgr_vis_usage(man) >> 20); } const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 37a499ab30eb02fc0506727b8004c71dc640db58..567c4a5cf90cc2ce927c151c2665d4d2ab5a8395 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1824,21 +1824,14 @@ static int cik_common_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_amdkfd_suspend(adev); - return cik_common_hw_fini(adev); } static int cik_common_resume(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = cik_common_hw_init(adev); - if (r) - return r; - - return amdgpu_amdkfd_resume(adev); + return cik_common_hw_init(adev); } static bool cik_common_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c216e16826c99df35e1ab22c9c14a97144fff9ad..f508f4d01e4a9000f633c85e290964098e8c1b86 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -341,6 +341,63 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev) /* XXX todo */ } +/** + * cik_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (VI). + */ +static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl, phase_quantum = 0; + int i; + + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); + if (enable) { + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 1); + if (amdgpu_sdma_phase_quantum) { + WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i], + phase_quantum); + WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i], + phase_quantum); + } + } else { + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, 0); + } + + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); + } +} + /** * cik_sdma_enable - stop the async dma engines * @@ -537,6 +594,8 @@ static int cik_sdma_start(struct amdgpu_device *adev) /* halt the engine before programing */ cik_sdma_enable(adev, false); + /* enable sdma ring preemption */ + cik_ctx_switch_enable(adev, true); /* start the gfx rings and rlc compute queues */ r = cik_sdma_gfx_resume(adev); @@ -984,6 +1043,7 @@ static int cik_sdma_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cik_ctx_switch_enable(adev, false); cik_sdma_enable(adev, false); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9f78c03a2e3152eb9244d4b733fbd194c6227299..4e519dc4291616912ed0622698cb8720fb89aa40 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -484,134 +484,6 @@ static bool dce_v10_0_is_display_hung(struct amdgpu_device *adev) return true; } -static void dce_v10_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp; - int i; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { -#if 0 - u32 frame_count; - int j; - - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - amdgpu_display_vblank_wait(adev, i); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - /* wait for the next frame */ - frame_count = amdgpu_display_vblank_get_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (amdgpu_display_vblank_get_counter(adev, i) != frame_count) - break; - udelay(1); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) { - tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1); - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) { - tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1); - WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } -#else - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ -#endif - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v10_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp, frame_count; - int i, j; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - - if (save->crtc_enabled[i]) { - tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 0) { - tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 0); - WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) { - tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0); - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) { - tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0); - WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0) - break; - udelay(1); - } - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - /* wait for the next frame */ - frame_count = amdgpu_display_vblank_get_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (amdgpu_display_vblank_get_counter(adev, i) != frame_count) - break; - udelay(1); - } - } - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start)); - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); -} - static void dce_v10_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1867,7 +1739,7 @@ static void dce_v10_0_afmt_setmode(struct drm_encoder *encoder, dce_v10_0_audio_write_sad_regs(encoder); dce_v10_0_audio_write_latency_fields(encoder, mode); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2267,6 +2139,7 @@ static void dce_v10_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; u32 tmp; @@ -2304,11 +2177,14 @@ static void dce_v10_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset); @@ -2555,7 +2431,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2563,7 +2439,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2597,7 +2473,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2624,15 +2500,6 @@ static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v10_0_crtc_load_lut(crtc); return 0; @@ -2844,14 +2711,12 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v10_0_crtc_set_base_atomic, .prepare = dce_v10_0_crtc_prepare, .commit = dce_v10_0_crtc_commit, - .load_lut = dce_v10_0_crtc_load_lut, .disable = dce_v10_0_crtc_disable, }; static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2869,12 +2734,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - switch (amdgpu_crtc->crtc_id) { case 0: default: @@ -3025,6 +2884,8 @@ static int dce_v10_0_hw_init(void *handle) dce_v10_0_init_golden_registers(adev); + /* disable vga render */ + dce_v10_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); @@ -3737,7 +3598,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { - .set_vga_render_state = &dce_v10_0_set_vga_render_state, .bandwidth_update = &dce_v10_0_bandwidth_update, .vblank_get_counter = &dce_v10_0_vblank_get_counter, .vblank_wait = &dce_v10_0_vblank_wait, @@ -3750,8 +3610,6 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v10_0_crtc_get_scanoutpos, .add_encoder = &dce_v10_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v10_0_stop_mc_access, - .resume_mc_access = &dce_v10_0_resume_mc_access, }; static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 4bcf01dc567a183b462a4333727f2fa90c6e924d..11edc75edaa99603c40d6986caf3fa8a5b94ff25 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -499,79 +499,6 @@ static bool dce_v11_0_is_display_hung(struct amdgpu_device *adev) return true; } -static void dce_v11_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp; - int i; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { -#if 1 - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - /*it is correct only for RGB ; black is 0*/ - WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } -#else - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ -#endif - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v11_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp; - int i; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - - if (save->crtc_enabled[i]) { - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start)); - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); -} - static void dce_v11_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1851,7 +1778,7 @@ static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder, dce_v11_0_audio_write_sad_regs(encoder); dce_v11_0_audio_write_latency_fields(encoder, mode); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2251,6 +2178,7 @@ static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; u32 tmp; @@ -2282,11 +2210,14 @@ static void dce_v11_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } tmp = RREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset); @@ -2575,7 +2506,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2583,7 +2514,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2617,7 +2548,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2644,15 +2575,6 @@ static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v11_0_crtc_load_lut(crtc); return 0; @@ -2892,14 +2814,12 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v11_0_crtc_set_base_atomic, .prepare = dce_v11_0_crtc_prepare, .commit = dce_v11_0_crtc_commit, - .load_lut = dce_v11_0_crtc_load_lut, .disable = dce_v11_0_crtc_disable, }; static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2917,12 +2837,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - switch (amdgpu_crtc->crtc_id) { case 0: default: @@ -3086,6 +3000,8 @@ static int dce_v11_0_hw_init(void *handle) dce_v11_0_init_golden_registers(adev); + /* disable vga render */ + dce_v11_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_crtc_powergate_init(adev); amdgpu_atombios_encoder_init_dig(adev); @@ -3806,7 +3722,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { - .set_vga_render_state = &dce_v11_0_set_vga_render_state, .bandwidth_update = &dce_v11_0_bandwidth_update, .vblank_get_counter = &dce_v11_0_vblank_get_counter, .vblank_wait = &dce_v11_0_vblank_wait, @@ -3819,8 +3734,6 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v11_0_crtc_get_scanoutpos, .add_encoder = &dce_v11_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v11_0_stop_mc_access, - .resume_mc_access = &dce_v11_0_resume_mc_access, }; static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index fd134a4629d7a4ee5f3d8559049f1bfaddd8bb48..a51e35f824a1a3211a0a6ccc5029dec78cdccff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -42,6 +42,7 @@ #include "dce/dce_6_0_d.h" #include "dce/dce_6_0_sh_mask.h" #include "gca/gfx_7_2_enum.h" +#include "dce_v6_0.h" #include "si_enums.h" static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev); @@ -392,117 +393,6 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) return mmDC_GPIO_HPD_A; } -static u32 evergreen_get_vblank_counter(struct amdgpu_device* adev, int crtc) -{ - if (crtc >= adev->mode_info.num_crtc) - return 0; - else - return RREG32(mmCRTC_STATUS_FRAME_COUNT + crtc_offsets[crtc]); -} - -static void dce_v6_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp, frame_count; - int i, j; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - WREG32(mmVGA_RENDER_CONTROL, 0); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK; - if (crtc_enabled) { - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - - if (!(tmp & CRTC_BLANK_CONTROL__CRTC_BLANK_DATA_EN_MASK)) { - dce_v6_0_vblank_wait(adev, i); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp |= CRTC_BLANK_CONTROL__CRTC_BLANK_DATA_EN_MASK; - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - /* wait for the next frame */ - frame_count = evergreen_get_vblank_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (evergreen_get_vblank_counter(adev, i) != frame_count) - break; - udelay(1); - } - - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp &= ~CRTC_CONTROL__CRTC_MASTER_EN_MASK; - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v6_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp; - int i, j; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, (u32)adev->mc.vram_start); - - /* unlock regs and wait for update */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (save->crtc_enabled[i]) { - tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]); - if ((tmp & 0x7) != 0) { - tmp &= ~0x7; - WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (tmp & GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK) { - tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK; - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]); - if (tmp & 1) { - tmp &= ~1; - WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if ((tmp & GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK) == 0) - break; - udelay(1); - } - } - } - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); - -} - static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1597,7 +1487,7 @@ static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder, ssize_t err; u32 tmp; - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2182,6 +2072,7 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); @@ -2211,11 +2102,14 @@ static void dce_v6_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, @@ -2428,7 +2322,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2436,7 +2330,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2470,7 +2364,7 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2496,15 +2390,6 @@ static int dce_v6_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v6_0_crtc_load_lut(crtc); return 0; @@ -2712,14 +2597,12 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v6_0_crtc_set_base_atomic, .prepare = dce_v6_0_crtc_prepare, .commit = dce_v6_0_crtc_commit, - .load_lut = dce_v6_0_crtc_load_lut, .disable = dce_v6_0_crtc_disable, }; static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2737,12 +2620,6 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id]; amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; @@ -2873,6 +2750,8 @@ static int dce_v6_0_hw_init(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* disable vga render */ + dce_v6_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); @@ -3525,7 +3404,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { - .set_vga_render_state = &dce_v6_0_set_vga_render_state, .bandwidth_update = &dce_v6_0_bandwidth_update, .vblank_get_counter = &dce_v6_0_vblank_get_counter, .vblank_wait = &dce_v6_0_vblank_wait, @@ -3538,8 +3416,6 @@ static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v6_0_crtc_get_scanoutpos, .add_encoder = &dce_v6_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v6_0_stop_mc_access, - .resume_mc_access = &dce_v6_0_resume_mc_access, }; static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a9e8695546272638e0922ba20853c241b0d7ae3b..9cf14b8b2db9b594ccd36085e29632ca511f4137 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -419,81 +419,6 @@ static bool dce_v8_0_is_display_hung(struct amdgpu_device *adev) return true; } -static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 crtc_enabled, tmp; - int i; - - save->vga_render_control = RREG32(mmVGA_RENDER_CONTROL); - save->vga_hdp_control = RREG32(mmVGA_HDP_CONTROL); - - /* disable VGA render */ - tmp = RREG32(mmVGA_RENDER_CONTROL); - tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); - WREG32(mmVGA_RENDER_CONTROL, tmp); - - /* blank the display controllers */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), - CRTC_CONTROL, CRTC_MASTER_EN); - if (crtc_enabled) { -#if 1 - save->crtc_enabled[i] = true; - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - /*it is correct only for RGB ; black is 0*/ - WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } - mdelay(20); -#else - /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); - tmp = RREG32(mmCRTC_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_CONTROL, CRTC_MASTER_EN, 0); - WREG32(mmCRTC_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - save->crtc_enabled[i] = false; - /* ***** */ -#endif - } else { - save->crtc_enabled[i] = false; - } - } -} - -static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - u32 tmp; - int i; - - /* update crtc base addresses */ - for (i = 0; i < adev->mode_info.num_crtc; i++) { - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); - - if (save->crtc_enabled[i]) { - tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); - tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - } - mdelay(20); - } - - WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start)); - WREG32(mmVGA_MEMORY_BASE_ADDRESS, lower_32_bits(adev->mc.vram_start)); - - /* Unlock vga access */ - WREG32(mmVGA_HDP_CONTROL, save->vga_hdp_control); - mdelay(1); - WREG32(mmVGA_RENDER_CONTROL, save->vga_render_control); -} - static void dce_v8_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { @@ -1750,7 +1675,7 @@ static void dce_v8_0_afmt_setmode(struct drm_encoder *encoder, dce_v8_0_audio_write_sad_regs(encoder); dce_v8_0_audio_write_latency_fields(encoder, mode); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (err < 0) { DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); return; @@ -2124,6 +2049,7 @@ static void dce_v8_0_crtc_load_lut(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; + u16 *r, *g, *b; int i; DRM_DEBUG_KMS("%d\n", amdgpu_crtc->crtc_id); @@ -2153,11 +2079,14 @@ static void dce_v8_0_crtc_load_lut(struct drm_crtc *crtc) WREG32(mmDC_LUT_WRITE_EN_MASK + amdgpu_crtc->crtc_offset, 0x00000007); WREG32(mmDC_LUT_RW_INDEX + amdgpu_crtc->crtc_offset, 0); + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; for (i = 0; i < 256; i++) { WREG32(mmDC_LUT_30_COLOR + amdgpu_crtc->crtc_offset, - (amdgpu_crtc->lut_r[i] << 20) | - (amdgpu_crtc->lut_g[i] << 10) | - (amdgpu_crtc->lut_b[i] << 0)); + ((*r++ & 0xffc0) << 14) | + ((*g++ & 0xffc0) << 4) | + (*b++ >> 6)); } WREG32(mmDEGAMMA_CONTROL + amdgpu_crtc->crtc_offset, @@ -2406,7 +2335,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, aobj = gem_to_amdgpu_bo(obj); ret = amdgpu_bo_reserve(aobj, false); if (ret != 0) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2414,7 +2343,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unreserve(aobj); if (ret) { DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } @@ -2448,7 +2377,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); } - drm_gem_object_unreference_unlocked(amdgpu_crtc->cursor_bo); + drm_gem_object_put_unlocked(amdgpu_crtc->cursor_bo); } amdgpu_crtc->cursor_bo = obj; @@ -2475,15 +2404,6 @@ static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } dce_v8_0_crtc_load_lut(crtc); return 0; @@ -2702,14 +2622,12 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .mode_set_base_atomic = dce_v8_0_crtc_set_base_atomic, .prepare = dce_v8_0_crtc_prepare, .commit = dce_v8_0_crtc_commit, - .load_lut = dce_v8_0_crtc_load_lut, .disable = dce_v8_0_crtc_disable, }; static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -2727,12 +2645,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) adev->ddev->mode_config.cursor_width = amdgpu_crtc->max_cursor_width; adev->ddev->mode_config.cursor_height = amdgpu_crtc->max_cursor_height; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - amdgpu_crtc->crtc_offset = crtc_offsets[amdgpu_crtc->crtc_id]; amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; @@ -2870,6 +2782,8 @@ static int dce_v8_0_hw_init(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* disable vga render */ + dce_v8_0_set_vga_render_state(adev, false); /* init dig PHYs, disp eng pll */ amdgpu_atombios_encoder_init_dig(adev); amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); @@ -3574,7 +3488,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { - .set_vga_render_state = &dce_v8_0_set_vga_render_state, .bandwidth_update = &dce_v8_0_bandwidth_update, .vblank_get_counter = &dce_v8_0_vblank_get_counter, .vblank_wait = &dce_v8_0_vblank_wait, @@ -3587,8 +3500,6 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { .page_flip_get_scanoutpos = &dce_v8_0_crtc_get_scanoutpos, .add_encoder = &dce_v8_0_encoder_add, .add_connector = &amdgpu_connector_add, - .stop_mc_access = &dce_v8_0_stop_mc_access, - .resume_mc_access = &dce_v8_0_resume_mc_access, }; static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 90bb08309a533cd2bad92de91ac9b3cc3bc3db7f..b9ee9073cb0dc2df7a0f28790282aa5c68a969fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -95,62 +95,6 @@ static u32 dce_virtual_hpd_get_gpio_reg(struct amdgpu_device *adev) return 0; } -static void dce_virtual_stop_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - dce_v6_0_disable_dce(adev); - break; -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: - dce_v8_0_disable_dce(adev); - break; -#endif - case CHIP_FIJI: - case CHIP_TONGA: - dce_v10_0_disable_dce(adev); - break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - dce_v11_0_disable_dce(adev); - break; - case CHIP_TOPAZ: -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_HAINAN: -#endif - /* no DCE */ - return; - default: - DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); - } - - return; -} -static void dce_virtual_resume_mc_access(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) -{ - return; -} - -static void dce_virtual_set_vga_render_state(struct amdgpu_device *adev, - bool render) -{ - return; -} - /** * dce_virtual_bandwidth_update - program display watermarks * @@ -168,16 +112,6 @@ static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - amdgpu_crtc->lut_r[i] = red[i] >> 6; - amdgpu_crtc->lut_g[i] = green[i] >> 6; - amdgpu_crtc->lut_b[i] = blue[i] >> 6; - } - return 0; } @@ -289,11 +223,6 @@ static int dce_virtual_crtc_set_base(struct drm_crtc *crtc, int x, int y, return 0; } -static void dce_virtual_crtc_load_lut(struct drm_crtc *crtc) -{ - return; -} - static int dce_virtual_crtc_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) @@ -309,14 +238,12 @@ static const struct drm_crtc_helper_funcs dce_virtual_crtc_helper_funcs = { .mode_set_base_atomic = dce_virtual_crtc_set_base_atomic, .prepare = dce_virtual_crtc_prepare, .commit = dce_virtual_crtc_commit, - .load_lut = dce_virtual_crtc_load_lut, .disable = dce_virtual_crtc_disable, }; static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; - int i; amdgpu_crtc = kzalloc(sizeof(struct amdgpu_crtc) + (AMDGPUFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); @@ -329,12 +256,6 @@ static int dce_virtual_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->crtc_id = index; adev->mode_info.crtcs[index] = amdgpu_crtc; - for (i = 0; i < 256; i++) { - amdgpu_crtc->lut_r[i] = i << 2; - amdgpu_crtc->lut_g[i] = i << 2; - amdgpu_crtc->lut_b[i] = i << 2; - } - amdgpu_crtc->pll_id = ATOM_PPLL_INVALID; amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; @@ -522,6 +443,47 @@ static int dce_virtual_sw_fini(void *handle) static int dce_virtual_hw_init(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + dce_v6_0_disable_dce(adev); + break; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: + dce_v8_0_disable_dce(adev); + break; +#endif + case CHIP_FIJI: + case CHIP_TONGA: + dce_v10_0_disable_dce(adev); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS11: + case CHIP_POLARIS10: + dce_v11_0_disable_dce(adev); + break; + case CHIP_TOPAZ: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + /* no DCE */ + break; + case CHIP_VEGA10: + break; + default: + DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); + } return 0; } @@ -677,7 +639,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, } static const struct amdgpu_display_funcs dce_virtual_display_funcs = { - .set_vga_render_state = &dce_virtual_set_vga_render_state, .bandwidth_update = &dce_virtual_bandwidth_update, .vblank_get_counter = &dce_virtual_vblank_get_counter, .vblank_wait = &dce_virtual_vblank_wait, @@ -690,8 +651,6 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = { .page_flip_get_scanoutpos = &dce_virtual_crtc_get_scanoutpos, .add_encoder = NULL, .add_connector = NULL, - .stop_mc_access = &dce_virtual_stop_mc_access, - .resume_mc_access = &dce_virtual_resume_mc_access, }; static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) @@ -809,7 +768,7 @@ static const struct amdgpu_irq_src_funcs dce_virtual_crtc_irq_funcs = { static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VBLANK6 + 1; adev->crtc_irq.funcs = &dce_virtual_crtc_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 5173ca1fd159d19971ace8cf87bafb8749a75bbf..d228f5a990449f0b2c28314c972db2664c6bc1cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1573,7 +1573,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) static void gfx_v6_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -2217,40 +2217,9 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.rlc.save_restore_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj); - adev->gfx.rlc.save_restore_obj = NULL; - } - - if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - adev->gfx.rlc.clear_state_obj = NULL; - } - - if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); - adev->gfx.rlc.cp_table_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); } static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) @@ -2273,43 +2242,23 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) if (src_ptr) { /* save restore block */ - if (adev->gfx.rlc.save_restore_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, NULL, - &adev->gfx.rlc.save_restore_obj); - - if (r) { - dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - gfx_v6_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", + r); gfx_v6_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r); - gfx_v6_0_rlc_fini(adev); - return r; - } /* write the sr buffer */ dst_ptr = adev->gfx.rlc.sr_ptr; for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) dst_ptr[i] = cpu_to_le32(src_ptr[i]); + amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } @@ -2319,39 +2268,17 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) adev->gfx.rlc.clear_state_size = gfx_v6_0_get_csb_size(adev); dws = adev->gfx.rlc.clear_state_size + (256 / 4); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - NULL, NULL, - &adev->gfx.rlc.clear_state_obj); - - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v6_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - gfx_v6_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v6_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); - gfx_v6_0_rlc_fini(adev); - return r; - } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; reg_list_mc_addr = adev->gfx.rlc.clear_state_gpu_addr + 256; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 37b45e4403d175229855c167218ceeeaa627180b..00868764a0dd2cfedbc0317a3f1b7e6216d88424 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1823,7 +1823,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) } /** - * gmc_v7_0_init_compute_vmid - gart enable + * gfx_v7_0_init_compute_vmid - gart enable * * @adev: amdgpu_device pointer * @@ -1833,7 +1833,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) #define DEFAULT_SH_MEM_BASES (0x6000) #define FIRST_COMPUTE_VMID (8) #define LAST_COMPUTE_VMID (16) -static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) +static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; uint32_t sh_mem_config; @@ -1921,6 +1921,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ELEMENT_SIZE, 1); sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, INDEX_STRIDE, 3); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { @@ -1934,12 +1935,11 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); WREG32(mmSH_MEM_BASES, sh_mem_base); - WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - gmc_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_compute_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); @@ -2021,7 +2021,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) */ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -2774,39 +2774,18 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev) */ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev) { - int i, r; + int i; for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - } + amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); } } static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); - - amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); - adev->gfx.mec.hpd_eop_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); } static int gfx_v7_0_mec_init(struct amdgpu_device *adev) @@ -2823,33 +2802,14 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) /* allocate space for ALL pipes (even the ones we don't own) */ mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * GFX7_MEC_HPD_SIZE * 2; - if (adev->gfx.mec.hpd_eop_obj == NULL) { - r = amdgpu_bo_create(adev, - mec_hpd_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.hpd_eop_obj); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); - gfx_v7_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); if (r) { - dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); + dev_warn(adev->dev, "(%d) create, pin or map of HDP EOP bo failed\n", r); gfx_v7_0_mec_fini(adev); return r; } @@ -3108,32 +3068,12 @@ static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) struct cik_mqd *mqd; struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct cik_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - goto out; - - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - goto out_unreserve; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + r = amdgpu_bo_create_reserved(adev, sizeof(struct cik_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &mqd_gpu_addr, (void **)&mqd); if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - goto out_unreserve; + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; } mutex_lock(&adev->srbm_mutex); @@ -3147,9 +3087,7 @@ static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) mutex_unlock(&adev->srbm_mutex); amdgpu_bo_kunmap(ring->mqd_obj); -out_unreserve: amdgpu_bo_unreserve(ring->mqd_obj); -out: return 0; } @@ -3361,43 +3299,9 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, */ static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) { - int r; - - /* save restore block */ - if (adev->gfx.rlc.save_restore_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC sr bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.save_restore_obj); - adev->gfx.rlc.save_restore_obj = NULL; - } - - /* clear state block */ - if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - adev->gfx.rlc.clear_state_obj = NULL; - } - - /* clear state block */ - if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - - amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); - adev->gfx.rlc.cp_table_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); } static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) @@ -3432,39 +3336,17 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) if (src_ptr) { /* save restore block */ - if (adev->gfx.rlc.save_restore_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.save_restore_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.rlc.save_restore_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.save_restore_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); - dev_warn(adev->dev, "(%d) pin RLC sr bo failed\n", r); + dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); gfx_v7_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.save_restore_obj, (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC sr bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } /* write the sr buffer */ dst_ptr = adev->gfx.rlc.sr_ptr; for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) @@ -3477,39 +3359,17 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) /* clear state block */ adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.clear_state_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v7_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; gfx_v7_0_get_csb_buffer(adev, dst_ptr); @@ -3518,37 +3378,14 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) } if (adev->gfx.rlc.cp_table_size) { - if (adev->gfx.rlc.cp_table_obj == NULL) { - r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.cp_table_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); - if (unlikely(r != 0)) { - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); if (r) { - dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); gfx_v7_0_rlc_fini(adev); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index aa5a50f5eac817cd2a51a157a3af6a44fa51f587..832e592fcd0725b0f32406aec57d2fc466fe1179 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -193,8 +193,8 @@ static const u32 tonga_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF }; static const u32 tonga_mgcg_cgcg_init[] = @@ -303,8 +303,8 @@ static const u32 polaris11_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, }; static const u32 golden_settings_polaris10_a11[] = @@ -336,8 +336,8 @@ static const u32 polaris10_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, }; static const u32 fiji_golden_common_all[] = @@ -348,8 +348,8 @@ static const u32 fiji_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, }; @@ -436,8 +436,8 @@ static const u32 iceland_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF }; static const u32 iceland_mgcg_cgcg_init[] = @@ -532,8 +532,8 @@ static const u32 cz_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF }; static const u32 cz_mgcg_cgcg_init[] = @@ -637,8 +637,8 @@ static const u32 stoney_golden_common_all[] = mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, - mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, - mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, }; static const u32 stoney_mgcg_cgcg_init[] = @@ -750,7 +750,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -1238,29 +1238,8 @@ static void cz_init_cp_jump_table(struct amdgpu_device *adev) static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) { - int r; - - /* clear state block */ - if (adev->gfx.rlc.clear_state_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - adev->gfx.rlc.clear_state_obj = NULL; - } - - /* jump table block */ - if (adev->gfx.rlc.cp_table_obj) { - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); - adev->gfx.rlc.cp_table_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); } static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) @@ -1278,39 +1257,17 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) /* clear state block */ adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.clear_state_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v8_0_rlc_fini(adev); - return r; - } - } - r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_rlc_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_gpu_addr); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); - dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); gfx_v8_0_rlc_fini(adev); return r; } - r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); - gfx_v8_0_rlc_fini(adev); - return r; - } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; gfx_v8_0_get_csb_buffer(adev, dst_ptr); @@ -1321,34 +1278,13 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) if ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY)) { adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - if (adev->gfx.rlc.cp_table_obj == NULL) { - r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, - &adev->gfx.rlc.cp_table_obj); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); - if (unlikely(r != 0)) { - dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); - return r; - } - r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_gpu_addr); - if (r) { - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); if (r) { - dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); + dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); return r; } @@ -1363,17 +1299,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); - adev->gfx.mec.hpd_eop_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); } static int gfx_v8_0_mec_init(struct amdgpu_device *adev) @@ -1389,34 +1315,13 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; - if (adev->gfx.mec.hpd_eop_obj == NULL) { - r = amdgpu_bo_create(adev, - mec_hpd_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.hpd_eop_obj); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_gpu_addr); + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); if (r) { - dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); - gfx_v8_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); - gfx_v8_0_mec_fini(adev); + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); return r; } @@ -3802,6 +3707,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) ELEMENT_SIZE, 1); sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, INDEX_STRIDE, 3); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { vi_srbm_select(adev, 0, 0, 0, i); @@ -3825,7 +3732,6 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); - WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -4564,7 +4470,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) /* This situation may be hit in the future if a new HW * generation exposes more than 64 queues. If so, the * definition of queue_mask needs updating */ - if (WARN_ON(i > (sizeof(queue_mask)*8))) { + if (WARN_ON(i >= (sizeof(queue_mask)*8))) { DRM_ERROR("Invalid KCQ enabled: %d\n", i); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c9b9c88231aa5070b709e97ce558a78fcfb43097..69182eeca264e723d2ae8a7bce6567258600732d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -116,7 +116,9 @@ static const u32 golden_settings_gc_9_0[] = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), 0x00001000, 0x00001000, SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, + SOC15_REG_OFFSET(GC, 0, mmSQC_CONFIG), 0x03000000, 0x020a2000, SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, @@ -211,7 +213,7 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) { - adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.num_reg = 8; adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } @@ -772,18 +774,16 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) if (cs_data) { /* clear state block */ adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); - if (adev->gfx.rlc.clear_state_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_err(adev->dev, - "(%d) failed to create rlc csb bo\n", r); - gfx_v9_0_rlc_fini(adev); - return r; - } + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", + r); + gfx_v9_0_rlc_fini(adev); + return r; } /* set up the cs buffer */ dst_ptr = adev->gfx.rlc.cs_ptr; @@ -795,18 +795,16 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) if (adev->asic_type == CHIP_RAVEN) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - if (adev->gfx.rlc.cp_table_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_err(adev->dev, - "(%d) failed to create cp table bo\n", r); - gfx_v9_0_rlc_fini(adev); - return r; - } + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); + if (r) { + dev_err(adev->dev, + "(%d) failed to create cp table bo\n", r); + gfx_v9_0_rlc_fini(adev); + return r; } rv_init_cp_jump_table(adev); @@ -821,28 +819,8 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { - int r; - - if (adev->gfx.mec.hpd_eop_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); - amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); - - amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); - adev->gfx.mec.hpd_eop_obj = NULL; - } - if (adev->gfx.mec.mec_fw_obj) { - r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); - amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); - amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); - - amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); - adev->gfx.mec.mec_fw_obj = NULL; - } + amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); } static int gfx_v9_0_mec_init(struct amdgpu_device *adev) @@ -862,33 +840,13 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) amdgpu_gfx_compute_queue_acquire(adev); mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; - if (adev->gfx.mec.hpd_eop_obj == NULL) { - r = amdgpu_bo_create(adev, - mec_hpd_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.hpd_eop_obj); - if (r) { - dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); - if (unlikely(r != 0)) { - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.hpd_eop_gpu_addr); + r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_obj, + &adev->gfx.mec.hpd_eop_gpu_addr, + (void **)&hpd); if (r) { - dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); gfx_v9_0_mec_fini(adev); return r; } @@ -905,42 +863,22 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; - if (adev->gfx.mec.mec_fw_obj == NULL) { - r = amdgpu_bo_create(adev, - mec_hdr->header.ucode_size_bytes, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &adev->gfx.mec.mec_fw_obj); - if (r) { - dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); - if (unlikely(r != 0)) { - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT, - &adev->gfx.mec.mec_fw_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r); - gfx_v9_0_mec_fini(adev); - return r; - } - r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw); + r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_obj, + &adev->gfx.mec.mec_fw_gpu_addr, + (void **)&fw); if (r) { - dev_warn(adev->dev, "(%d) map firmware bo failed\n", r); + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); gfx_v9_0_mec_fini(adev); return r; } + memcpy(fw, fw_data, fw_size); amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); - return 0; } @@ -2219,7 +2157,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; const struct cs_section_def *sect = NULL; const struct cs_extent_def *ext = NULL; - int r, i; + int r, i, tmp; /* init the CP */ WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); @@ -2227,7 +2165,7 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); - r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); + r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); return r; @@ -2265,6 +2203,12 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, 0x8000); amdgpu_ring_write(ring, 0x8000); + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); + tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | + (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0); + amdgpu_ring_commit(ring); return 0; @@ -2427,7 +2371,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) /* This situation may be hit in the future if a new HW * generation exposes more than 64 queues. If so, the * definition of queue_mask needs updating */ - if (WARN_ON(i > (sizeof(queue_mask)*8))) { + if (WARN_ON(i >= (sizeof(queue_mask)*8))) { DRM_ERROR("Invalid KCQ enabled: %d\n", i); break; } @@ -4158,7 +4102,7 @@ static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, return 0; } -const struct amd_ip_funcs gfx_v9_0_ip_funcs = { +static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, .late_init = gfx_v9_0_late_init, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h index 56ef652a575dd3d36d60c48c13a049c620e350ee..fa5a3fbaf6aba2f873bc6014c8977299ab99561c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -24,7 +24,6 @@ #ifndef __GFX_V9_0_H__ #define __GFX_V9_0_H__ -extern const struct amd_ip_funcs gfx_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index a42f483767e75510d02d834fdc724c3b83baa6ec..4f2788b61a08bd4db43d52d14ec9ab6e771b0e37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -58,14 +58,14 @@ static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) gfxhub_v1_0_init_gart_pt_regs(adev); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->mc.gtt_start >> 12)); + (u32)(adev->mc.gart_start >> 12)); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->mc.gtt_start >> 44)); + (u32)(adev->mc.gart_start >> 44)); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->mc.gtt_end >> 12)); + (u32)(adev->mc.gart_end >> 12)); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->mc.gtt_end >> 44)); + (u32)(adev->mc.gart_end >> 44)); } static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) @@ -124,12 +124,12 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, field; /* Setup L2 cache */ tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); @@ -143,7 +143,10 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); + field = adev->vm_manager.fragment_size; tmp = mmVM_L2_CNTL3_DEFAULT; + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; @@ -206,6 +209,9 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index d2dbb085f4802249fcaceaed5f8bab266c6ca6ed..206e29cad7533579f460f3964fc5c07326d50bf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -30,7 +30,5 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); void gfxhub_v1_0_init(struct amdgpu_device *adev); u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); -extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; -extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index d0214d942bfc48f2044d6ef26f8671ee5d3b6f39..12b0c4cd7a5af801b32dd29067cbba3ee88b2e08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -66,14 +66,10 @@ static const u32 crtc_offsets[6] = SI_CRTC5_REGISTER_OFFSET }; -static void gmc_v6_0_mc_stop(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v6_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; - if (adev->mode_info.num_crtc) - amdgpu_display_stop_mc_access(adev, save); - gmc_v6_0_wait_for_idle((void *)adev); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); @@ -90,8 +86,7 @@ static void gmc_v6_0_mc_stop(struct amdgpu_device *adev, } -static void gmc_v6_0_mc_resume(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v6_0_mc_resume(struct amdgpu_device *adev) { u32 tmp; @@ -103,10 +98,6 @@ static void gmc_v6_0_mc_resume(struct amdgpu_device *adev, tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1); tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1); WREG32(mmBIF_FB_EN, tmp); - - if (adev->mode_info.num_crtc) - amdgpu_display_resume_mc_access(adev, save); - } static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) @@ -228,20 +219,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev) static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { + u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + if (mc->mc_vram_size > 0xFFC0000000ULL) { dev_warn(adev->dev, "limiting VRAM\n"); mc->real_vram_size = 0xFFC0000000ULL; mc->mc_vram_size = 0xFFC0000000ULL; } - amdgpu_vram_location(adev, &adev->mc, 0); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_vram_location(adev, &adev->mc, base); + amdgpu_gart_location(adev, mc); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; - u32 tmp; int i, j; /* Initialize HDP */ @@ -254,16 +245,23 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (adev->mode_info.num_crtc) - amdgpu_display_set_vga_render_state(adev, false); - - gmc_v6_0_mc_stop(adev, &save); - if (gmc_v6_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - WREG32(mmVGA_HDP_CONTROL, VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK); + if (adev->mode_info.num_crtc) { + u32 tmp; + + /* Lockout access through VGA aperture*/ + tmp = RREG32(mmVGA_HDP_CONTROL); + tmp |= VGA_HDP_CONTROL__VGA_MEMORY_DISABLE_MASK; + WREG32(mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32(mmVGA_RENDER_CONTROL); + tmp &= ~VGA_VSTATUS_CNTL; + WREG32(mmVGA_RENDER_CONTROL, tmp); + } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->mc.vram_start >> 12); @@ -271,13 +269,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) adev->mc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); - WREG32(mmMC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); - WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); @@ -285,7 +276,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) if (gmc_v6_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - gmc_v6_0_mc_resume(adev, &save); } static int gmc_v6_0_mc_init(struct amdgpu_device *adev) @@ -342,15 +332,7 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; adev->mc.visible_vram_size = adev->mc.aper_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; - + amdgpu_gart_set_defaults(adev); gmc_v6_0_vram_gtt_location(adev, &adev->mc); return 0; @@ -479,6 +461,7 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { int r, i; + u32 field; if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -506,13 +489,15 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_L2_CNTL2, VM_L2_CNTL2__INVALIDATE_ALL_L1_TLBS_MASK | VM_L2_CNTL2__INVALIDATE_L2_CACHE_MASK); + + field = adev->vm_manager.fragment_size; WREG32(mmVM_L2_CNTL3, VM_L2_CNTL3__L2_CACHE_BIGK_ASSOCIATIVITY_MASK | - (4UL << VM_L2_CNTL3__BANK_SELECT__SHIFT) | - (4UL << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); + (field << VM_L2_CNTL3__BANK_SELECT__SHIFT) | + (field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT)); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -559,7 +544,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) gmc_v6_0_gart_flush_gpu_tlb(adev, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -829,7 +814,7 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; - amdgpu_vm_adjust_size(adev, 64); + amdgpu_vm_adjust_size(adev, 64, 4); adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; adev->mc.mc_mask = 0xffffffffffULL; @@ -987,7 +972,6 @@ static int gmc_v6_0_wait_for_idle(void *handle) static int gmc_v6_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_mode_mc_save save; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1003,7 +987,7 @@ static int gmc_v6_0_soft_reset(void *handle) } if (srbm_soft_reset) { - gmc_v6_0_mc_stop(adev, &save); + gmc_v6_0_mc_stop(adev); if (gmc_v6_0_wait_for_idle(adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); } @@ -1023,7 +1007,7 @@ static int gmc_v6_0_soft_reset(void *handle) udelay(50); - gmc_v6_0_mc_resume(adev, &save); + gmc_v6_0_mc_resume(adev); udelay(50); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 7e9ea53edf8bfe65ef5667a4a6a8002c9e0ef0cf..e42c1ad3af5e06c33a59939bc9abab48b2d9c406 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -37,6 +37,9 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "dce/dce_8_0_d.h" +#include "dce/dce_8_0_sh_mask.h" + #include "amdgpu_atombios.h" static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); @@ -76,14 +79,10 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) } } -static void gmc_v7_0_mc_stop(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v7_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; - if (adev->mode_info.num_crtc) - amdgpu_display_stop_mc_access(adev, save); - gmc_v7_0_wait_for_idle((void *)adev); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); @@ -99,8 +98,7 @@ static void gmc_v7_0_mc_stop(struct amdgpu_device *adev, udelay(100); } -static void gmc_v7_0_mc_resume(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v7_0_mc_resume(struct amdgpu_device *adev) { u32 tmp; @@ -112,9 +110,6 @@ static void gmc_v7_0_mc_resume(struct amdgpu_device *adev, tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1); tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1); WREG32(mmBIF_FB_EN, tmp); - - if (adev->mode_info.num_crtc) - amdgpu_display_resume_mc_access(adev, save); } /** @@ -242,15 +237,17 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev) static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { + u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + if (mc->mc_vram_size > 0xFFC0000000ULL) { /* leave room for at least 1024M GTT */ dev_warn(adev->dev, "limiting VRAM\n"); mc->real_vram_size = 0xFFC0000000ULL; mc->mc_vram_size = 0xFFC0000000ULL; } - amdgpu_vram_location(adev, &adev->mc, 0); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_vram_location(adev, &adev->mc, base); + amdgpu_gart_location(adev, mc); } /** @@ -263,7 +260,6 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, */ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; u32 tmp; int i, j; @@ -277,13 +273,20 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (adev->mode_info.num_crtc) - amdgpu_display_set_vga_render_state(adev, false); - - gmc_v7_0_mc_stop(adev, &save); if (gmc_v7_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } + if (adev->mode_info.num_crtc) { + /* Lockout access through VGA aperture*/ + tmp = RREG32(mmVGA_HDP_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + WREG32(mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32(mmVGA_RENDER_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + WREG32(mmVGA_RENDER_CONTROL, tmp); + } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->mc.vram_start >> 12); @@ -291,20 +294,12 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) adev->mc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); - WREG32(mmMC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); - WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); if (gmc_v7_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - gmc_v7_0_mc_resume(adev, &save); WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -391,15 +386,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; - + amdgpu_gart_set_defaults(adev); gmc_v7_0_vram_gtt_location(adev, &adev->mc); return 0; @@ -575,7 +562,7 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) { int r, i; - u32 tmp; + u32 tmp, field; if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -605,14 +592,16 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32(mmVM_L2_CNTL2, tmp); + + field = adev->vm_manager.fragment_size; tmp = RREG32(mmVM_L2_CNTL3); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); WREG32(mmVM_L2_CNTL3, tmp); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -666,7 +655,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) gmc_v7_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -961,7 +950,7 @@ static int gmc_v7_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - amdgpu_vm_adjust_size(adev, 64); + amdgpu_vm_adjust_size(adev, 64, 4); adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; /* Set the internal MC address mask @@ -1138,7 +1127,6 @@ static int gmc_v7_0_wait_for_idle(void *handle) static int gmc_v7_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_mode_mc_save save; u32 srbm_soft_reset = 0; u32 tmp = RREG32(mmSRBM_STATUS); @@ -1154,7 +1142,7 @@ static int gmc_v7_0_soft_reset(void *handle) } if (srbm_soft_reset) { - gmc_v7_0_mc_stop(adev, &save); + gmc_v7_0_mc_stop(adev); if (gmc_v7_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); } @@ -1175,7 +1163,7 @@ static int gmc_v7_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - gmc_v7_0_mc_resume(adev, &save); + gmc_v7_0_mc_resume(adev); udelay(50); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index cc9f88057cd5e745cfdcc36d16e716fcc2c168ac..7ca2dae8237a0952da81414d5e14b53fa06e566e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -35,6 +35,9 @@ #include "oss/oss_3_0_d.h" #include "oss/oss_3_0_sh_mask.h" +#include "dce/dce_10_0_d.h" +#include "dce/dce_10_0_sh_mask.h" + #include "vid.h" #include "vi.h" @@ -161,14 +164,10 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) } } -static void gmc_v8_0_mc_stop(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v8_0_mc_stop(struct amdgpu_device *adev) { u32 blackout; - if (adev->mode_info.num_crtc) - amdgpu_display_stop_mc_access(adev, save); - gmc_v8_0_wait_for_idle(adev); blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL); @@ -184,8 +183,7 @@ static void gmc_v8_0_mc_stop(struct amdgpu_device *adev, udelay(100); } -static void gmc_v8_0_mc_resume(struct amdgpu_device *adev, - struct amdgpu_mode_mc_save *save) +static void gmc_v8_0_mc_resume(struct amdgpu_device *adev) { u32 tmp; @@ -197,9 +195,6 @@ static void gmc_v8_0_mc_resume(struct amdgpu_device *adev, tmp = REG_SET_FIELD(0, BIF_FB_EN, FB_READ_EN, 1); tmp = REG_SET_FIELD(tmp, BIF_FB_EN, FB_WRITE_EN, 1); WREG32(mmBIF_FB_EN, tmp); - - if (adev->mode_info.num_crtc) - amdgpu_display_resume_mc_access(adev, save); } /** @@ -404,15 +399,20 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { + u64 base = 0; + + if (!amdgpu_sriov_vf(adev)) + base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + if (mc->mc_vram_size > 0xFFC0000000ULL) { /* leave room for at least 1024M GTT */ dev_warn(adev->dev, "limiting VRAM\n"); mc->real_vram_size = 0xFFC0000000ULL; mc->mc_vram_size = 0xFFC0000000ULL; } - amdgpu_vram_location(adev, &adev->mc, 0); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_vram_location(adev, &adev->mc, base); + amdgpu_gart_location(adev, mc); } /** @@ -425,7 +425,6 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, */ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; u32 tmp; int i, j; @@ -439,13 +438,20 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (adev->mode_info.num_crtc) - amdgpu_display_set_vga_render_state(adev, false); - - gmc_v8_0_mc_stop(adev, &save); if (gmc_v8_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } + if (adev->mode_info.num_crtc) { + /* Lockout access through VGA aperture*/ + tmp = RREG32(mmVGA_HDP_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + WREG32(mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32(mmVGA_RENDER_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + WREG32(mmVGA_RENDER_CONTROL, tmp); + } /* Update configuration */ WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, adev->mc.vram_start >> 12); @@ -453,20 +459,23 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) adev->mc.vram_end >> 12); WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->vram_scratch.gpu_addr >> 12); - tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; - tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); - WREG32(mmMC_VM_FB_LOCATION, tmp); - /* XXX double check these! */ - WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); - WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); - WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); + + if (amdgpu_sriov_vf(adev)) { + tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16; + tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF); + WREG32(mmMC_VM_FB_LOCATION, tmp); + /* XXX double check these! */ + WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8)); + WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30)); + WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF); + } + WREG32(mmMC_VM_AGP_BASE, 0); WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); if (gmc_v8_0_wait_for_idle((void *)adev)) { dev_warn(adev->dev, "Wait for MC idle timedout !\n"); } - gmc_v8_0_mc_resume(adev, &save); WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -553,15 +562,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; - + amdgpu_gart_set_defaults(adev); gmc_v8_0_vram_gtt_location(adev, &adev->mc); return 0; @@ -761,7 +762,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) { int r, i; - u32 tmp; + u32 tmp, field; if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -792,10 +793,12 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32(mmVM_L2_CNTL2, tmp); + + field = adev->vm_manager.fragment_size; tmp = RREG32(mmVM_L2_CNTL3); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 4); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 4); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field); WREG32(mmVM_L2_CNTL3, tmp); /* XXX: set to enable PTE/PDE in system memory */ tmp = RREG32(mmVM_L2_CNTL4); @@ -813,8 +816,8 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0); WREG32(mmVM_L2_CNTL4, tmp); /* setup context0 */ - WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page.addr >> 12)); @@ -869,7 +872,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) gmc_v8_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -1045,7 +1048,7 @@ static int gmc_v8_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - amdgpu_vm_adjust_size(adev, 64); + amdgpu_vm_adjust_size(adev, 64, 4); adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; /* Set the internal MC address mask @@ -1260,7 +1263,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle) if (!adev->mc.srbm_soft_reset) return 0; - gmc_v8_0_mc_stop(adev, &adev->mc.save); + gmc_v8_0_mc_stop(adev); if (gmc_v8_0_wait_for_idle(adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); } @@ -1306,7 +1309,7 @@ static int gmc_v8_0_post_soft_reset(void *handle) if (!adev->mc.srbm_soft_reset) return 0; - gmc_v8_0_mc_resume(adev, &adev->mc.save); + gmc_v8_0_mc_resume(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 175ba5f9691c4a5188442d012430e4212bbaee20..2769c2b3b56e8f5ff8e8bbfa0f2cefe86569fe1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -23,11 +23,14 @@ #include #include "amdgpu.h" #include "gmc_v9_0.h" +#include "amdgpu_atomfirmware.h" #include "vega10/soc15ip.h" #include "vega10/HDP/hdp_4_0_offset.h" #include "vega10/HDP/hdp_4_0_sh_mask.h" #include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/DC/dce_12_0_offset.h" +#include "vega10/DC/dce_12_0_sh_mask.h" #include "vega10/vega10_enum.h" #include "soc15_common.h" @@ -419,8 +422,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); amdgpu_vram_location(adev, &adev->mc, base); - adev->mc.gtt_base_align = 0; - amdgpu_gtt_location(adev, mc); + amdgpu_gart_location(adev, mc); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -442,43 +444,46 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) u32 tmp; int chansize, numchan; - /* hbm memory channel size */ - chansize = 128; - - tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); - tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; - tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; - switch (tmp) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 0; - break; - case 3: - numchan = 4; - break; - case 4: - numchan = 0; - break; - case 5: - numchan = 8; - break; - case 6: - numchan = 0; - break; - case 7: - numchan = 16; - break; - case 8: - numchan = 2; - break; + adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + if (!adev->mc.vram_width) { + /* hbm memory channel size */ + chansize = 128; + + tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + switch (tmp) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 0; + break; + case 3: + numchan = 4; + break; + case 4: + numchan = 0; + break; + case 5: + numchan = 8; + break; + case 6: + numchan = 0; + break; + case 7: + numchan = 16; + break; + case 8: + numchan = 2; + break; + } + adev->mc.vram_width = numchan * chansize; } - adev->mc.vram_width = numchan * chansize; /* Could aper size report 0 ? */ adev->mc.aper_base = pci_resource_start(adev->pdev, 0); @@ -494,15 +499,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; - /* unless the user had overridden it, set the gart - * size equal to the 1024 or vram, whichever is larger. - */ - if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); - else - adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; - + amdgpu_gart_set_defaults(adev); gmc_v9_0_vram_gtt_location(adev, &adev->mc); return 0; @@ -537,10 +534,21 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->mc.invalidate_lock); - if (adev->flags & AMD_IS_APU) { + switch (adev->asic_type) { + case CHIP_RAVEN: adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; - amdgpu_vm_adjust_size(adev, 64); - } else { + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { + adev->vm_manager.vm_size = 1U << 18; + adev->vm_manager.block_size = 9; + adev->vm_manager.num_level = 3; + amdgpu_vm_set_fragment_size(adev, 9); + } else { + /* vm_size is 64GB for legacy 2-level page support */ + amdgpu_vm_adjust_size(adev, 64, 9); + adev->vm_manager.num_level = 1; + } + break; + case CHIP_VEGA10: /* XXX Don't know how to get VRAM type yet. */ adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; /* @@ -550,11 +558,18 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->vm_manager.vm_size = 1U << 18; adev->vm_manager.block_size = 9; - DRM_INFO("vm size is %llu GB, block size is %u-bit\n", - adev->vm_manager.vm_size, - adev->vm_manager.block_size); + adev->vm_manager.num_level = 3; + amdgpu_vm_set_fragment_size(adev, 9); + break; + default: + break; } + DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n", + adev->vm_manager.vm_size, + adev->vm_manager.block_size, + adev->vm_manager.fragment_size); + /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, &adev->mc.vm_fault); @@ -619,11 +634,6 @@ static int gmc_v9_0_sw_init(void *handle) adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - /* TODO: fix num_level for APU when updating vm size and block size */ - if (adev->flags & AMD_IS_APU) - adev->vm_manager.num_level = 1; - else - adev->vm_manager.num_level = 3; amdgpu_vm_manager_init(adev); return 0; @@ -731,7 +741,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) gmc_v9_0_gart_flush_gpu_tlb(adev, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->mc.gtt_size >> 20), + (unsigned)(adev->mc.gart_size >> 20), (unsigned long long)adev->gart.table_addr); adev->gart.ready = true; return 0; @@ -745,6 +755,20 @@ static int gmc_v9_0_hw_init(void *handle) /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); + if (adev->mode_info.num_crtc) { + u32 tmp; + + /* Lockout access through VGA aperture*/ + tmp = RREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1); + WREG32_SOC15(DCE, 0, mmVGA_HDP_CONTROL, tmp); + + /* disable VGA render */ + tmp = RREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL); + tmp = REG_SET_FIELD(tmp, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); + WREG32_SOC15(DCE, 0, mmVGA_RENDER_CONTROL, tmp); + } + r = gmc_v9_0_gart_enable(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 9804318f3488cf67eaf357756dd239425fc27457..4395a4f12149c37db23404fa89077d783aa88c06 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -69,14 +69,14 @@ static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) mmhub_v1_0_init_gart_pt_regs(adev); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, - (u32)(adev->mc.gtt_start >> 12)); + (u32)(adev->mc.gart_start >> 12)); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, - (u32)(adev->mc.gtt_start >> 44)); + (u32)(adev->mc.gart_start >> 44)); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, - (u32)(adev->mc.gtt_end >> 12)); + (u32)(adev->mc.gart_end >> 12)); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, - (u32)(adev->mc.gtt_end >> 44)); + (u32)(adev->mc.gart_end >> 44)); } static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) @@ -138,12 +138,12 @@ static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, field; /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 1); /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); @@ -157,7 +157,10 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); + field = adev->vm_manager.fragment_size; tmp = mmVM_L2_CNTL3_DEFAULT; + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, field); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; @@ -222,6 +225,9 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); @@ -245,28 +251,28 @@ static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) } struct pctl_data { - uint32_t index; - uint32_t data; + uint32_t index; + uint32_t data; }; -const struct pctl_data pctl0_data[] = { - {0x0, 0x7a640}, - {0x9, 0x2a64a}, - {0xd, 0x2a680}, - {0x11, 0x6a684}, - {0x19, 0xea68e}, - {0x29, 0xa69e}, - {0x2b, 0x34a6c0}, - {0x61, 0x83a707}, - {0xe6, 0x8a7a4}, - {0xf0, 0x1a7b8}, - {0xf3, 0xfa7cc}, - {0x104, 0x17a7dd}, - {0x11d, 0xa7dc}, - {0x11f, 0x12a7f5}, - {0x133, 0xa808}, - {0x135, 0x12a810}, - {0x149, 0x7a82c} +static const struct pctl_data pctl0_data[] = { + {0x0, 0x7a640}, + {0x9, 0x2a64a}, + {0xd, 0x2a680}, + {0x11, 0x6a684}, + {0x19, 0xea68e}, + {0x29, 0xa69e}, + {0x2b, 0x34a6c0}, + {0x61, 0x83a707}, + {0xe6, 0x8a7a4}, + {0xf0, 0x1a7b8}, + {0xf3, 0xfa7cc}, + {0x104, 0x17a7dd}, + {0x11d, 0xa7dc}, + {0x11f, 0x12a7f5}, + {0x133, 0xa808}, + {0x135, 0x12a810}, + {0x149, 0x7a82c} }; #define PCTL0_DATA_LEN (sizeof(pctl0_data)/sizeof(pctl0_data[0])) @@ -274,32 +280,39 @@ const struct pctl_data pctl0_data[] = { #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 -const struct pctl_data pctl1_data[] = { - {0x0, 0x39a000}, - {0x3b, 0x44a040}, - {0x81, 0x2a08d}, - {0x85, 0x6ba094}, - {0xf2, 0x18a100}, - {0x10c, 0x4a132}, - {0x112, 0xca141}, - {0x120, 0x2fa158}, - {0x151, 0x17a1d0}, - {0x16a, 0x1a1e9}, - {0x16d, 0x13a1ec}, - {0x182, 0x7a201}, - {0x18b, 0x3a20a}, - {0x190, 0x7a580}, - {0x199, 0xa590}, - {0x19b, 0x4a594}, - {0x1a1, 0x1a59c}, - {0x1a4, 0x7a82c}, - {0x1ad, 0xfa7cc}, - {0x1be, 0x17a7dd}, - {0x1d7, 0x12a810} +static const struct pctl_data pctl1_data[] = { + {0x0, 0x39a000}, + {0x3b, 0x44a040}, + {0x81, 0x2a08d}, + {0x85, 0x6ba094}, + {0xf2, 0x18a100}, + {0x10c, 0x4a132}, + {0x112, 0xca141}, + {0x120, 0x2fa158}, + {0x151, 0x17a1d0}, + {0x16a, 0x1a1e9}, + {0x16d, 0x13a1ec}, + {0x182, 0x7a201}, + {0x18b, 0x3a20a}, + {0x190, 0x7a580}, + {0x199, 0xa590}, + {0x19b, 0x4a594}, + {0x1a1, 0x1a59c}, + {0x1a4, 0x7a82c}, + {0x1ad, 0xfa7cc}, + {0x1be, 0x17a7dd}, + {0x1d7, 0x12a810}, + {0x1eb, 0x4000a7e1}, + {0x1ec, 0x5000a7f5}, + {0x1ed, 0x4000a7e2}, + {0x1ee, 0x5000a7dc}, + {0x1ef, 0x4000a7e3}, + {0x1f0, 0x5000a7f6}, + {0x1f1, 0x5000a7e4} }; #define PCTL1_DATA_LEN (sizeof(pctl1_data)/sizeof(pctl1_data[0])) -#define PCTL1_RENG_EXEC_END_PTR 0x1ea +#define PCTL1_RENG_EXEC_END_PTR 0x1f1 #define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 #define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d #define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580 diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index 57bb940c0ecd8300b7f646cc10865205edf427a1..5d38229baf6981d6a3830e04de5e87ba981ff97a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -36,7 +36,4 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev); void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, bool enable); -extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; -extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index bde3ca3c21c190fb526a3f0fe7a1706122a71f59..2812d88a8bdd480371b9ad93ced23322e581e9c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -72,21 +72,6 @@ static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val) reg); } -static void xgpu_ai_mailbox_trans_msg(struct amdgpu_device *adev, - enum idh_request req) -{ - u32 reg; - - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); - reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, - MSGBUF_DATA, req); - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), - reg); - - xgpu_ai_mailbox_set_valid(adev, true); -} - static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, enum idh_event event) { @@ -154,13 +139,25 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) return r; } - -static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, - enum idh_request req) -{ +static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) { + u32 reg; int r; - xgpu_ai_mailbox_trans_msg(adev, req); + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); + reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, + MSGBUF_DATA, req); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), + reg); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1), + data1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2), + data2); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3), + data3); + + xgpu_ai_mailbox_set_valid(adev, true); /* start to poll ack */ r = xgpu_ai_poll_ack(adev); @@ -168,6 +165,14 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, pr_err("Doesn't get ack from pf, continue\n"); xgpu_ai_mailbox_set_valid(adev, false); +} + +static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + int r; + + xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0); /* start to check msg if request is idh_req_gpu_init_access */ if (req == IDH_REQ_GPU_INIT_ACCESS || @@ -342,4 +347,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, .reset_gpu = xgpu_ai_request_reset, + .trans_msg = xgpu_ai_mailbox_trans_msg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 9aefc44d2c34474296c17d83dc7a7d9100a6171f..1e91b9a1c59150f6bc86101cb0339bdb3a0abf5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -31,7 +31,9 @@ enum idh_request { IDH_REL_GPU_INIT_ACCESS, IDH_REQ_GPU_FINI_ACCESS, IDH_REL_GPU_FINI_ACCESS, - IDH_REQ_GPU_RESET_ACCESS + IDH_REQ_GPU_RESET_ACCESS, + + IDH_LOG_VF_ERROR = 200, }; enum idh_event { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 171a658135b50795d3420eb6f50bd195fb79d35c..c25a831f94ecf65b8a8a6bd6653b5d188ebb42c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -613,4 +613,5 @@ const struct amdgpu_virt_ops xgpu_vi_virt_ops = { .req_full_gpu = xgpu_vi_request_full_gpu_access, .rel_full_gpu = xgpu_vi_release_full_gpu_access, .reset_gpu = xgpu_vi_request_reset, + .trans_msg = NULL, /* Does not need to trans VF errors to host. */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h index 2db741131bc6fac7efc035dbbe962b7c404f6e2e..c791d73d2d542cc368def9b227ebff3c1a6e8e62 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h @@ -32,7 +32,9 @@ enum idh_request { IDH_REL_GPU_INIT_ACCESS, IDH_REQ_GPU_FINI_ACCESS, IDH_REL_GPU_FINI_ACCESS, - IDH_REQ_GPU_RESET_ACCESS + IDH_REQ_GPU_RESET_ACCESS, + + IDH_LOG_VF_ERROR = 200, }; /* VI mailbox messages data */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 1e272f785def74e40b3019b3aeb1ef69a957ed0d..045988b18bc3361fff6b52f6e38f499d23c9090c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -32,6 +32,7 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 +#define smnPCIE_CONFIG_CNTL 0x11180044 u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { @@ -67,7 +68,7 @@ void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) { - WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) @@ -256,3 +257,15 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } } + +void nbio_v6_1_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CONFIG_CNTL); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index f6f8bc0455189bb8766f90cf55cdcdff18526805..686e4b4d296a7bb02d18a540b0a80afdc1b4ad4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -50,5 +50,6 @@ void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable); void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev); +void nbio_v6_1_init_registers(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index aa04632523fa8e4457d0674804d8eef98e6faf10..11b70d601922e170efe6b5de950953df7a5eacdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -65,7 +65,7 @@ void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) { - WREG32_SOC15(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); } u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 2258323a3c2698a18b6c093bd5143eff98f9c963..f7cf994b1da2871a4437655f8306cf9bcb284468 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -86,6 +86,52 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * return 0; } +int psp_v10_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_RAVEN: + chip_name = "raven"; + break; + default: BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out; + + hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)hdr + + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + + return 0; +out: + if (err) { + dev_err(adev->dev, + "psp v10.0: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } + + return err; +} + int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { int ret; @@ -110,7 +156,6 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { int ret = 0; - unsigned int psp_ring_reg = 0; struct psp_ring *ring; struct amdgpu_device *adev = psp->adev; @@ -130,6 +175,16 @@ int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) return ret; } + return 0; +} + +int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); @@ -143,13 +198,42 @@ int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) psp_ring_reg = ring_type; psp_ring_reg = psp_ring_reg << 16; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* There might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) in C2PMSG_64 */ - psp_ring_reg = 0; - while ((psp_ring_reg & 0x80000000) == 0) { - psp_ring_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64); - } + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); - return 0; + return ret; +} + +int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + unsigned int psp_ring_reg = 0; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + /* Write the ring destroy command to C2PMSG_64 */ + psp_ring_reg = 3 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* There might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; } int psp_v10_0_cmd_submit(struct psp_context *psp, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h index 2022b7b7151e9c374be77189c7eb4bbae20546db..e76cde2f01f9521f5a13509c17687cb532c08b9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h @@ -27,10 +27,15 @@ #include "amdgpu_psp.h" +extern int psp_v10_0_init_microcode(struct psp_context *psp); extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd); extern int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type); +extern int psp_v10_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v10_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type); extern int psp_v10_0_cmd_submit(struct psp_context *psp, struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index c98d77d0c8f84a064fa16d82ed1f57fedb9723be..2a535a4b8d5b3263dde9158f09be3c8d5547ce12 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -237,11 +237,9 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); -#if 0 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, true); -#endif return ret; } @@ -341,10 +339,10 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), 0x80000000, 0x80000000, false); - if (ring->ring_mem) - amdgpu_bo_free_kernel(&adev->firmware.rbuf, - &ring->ring_mem_mc_addr, - (void **)&ring->ring_mem); + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 1d766ae98dc8d90c85cead54b966bef91c000f87..b1de44f2282490189f0c4024d3d75711512547fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -551,17 +551,53 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { - u32 f32_cntl; + u32 f32_cntl, phase_quantum = 0; int i; + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); - if (enable) + if (enable) { f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, 1); - else + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + ATC_L1_ENABLE, 1); + if (amdgpu_sdma_phase_quantum) { + WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i], + phase_quantum); + WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i], + phase_quantum); + } + } else { f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, 0); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + ATC_L1_ENABLE, 1); + } + WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl); } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4a65697ccc942ec1b2b7393acbcf360274bb8e13..fd7c72aaafa6248a566996140128a1fc22569279 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -291,6 +291,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Setting write pointer\n"); if (ring->use_doorbell) { + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " "lower_32_bits(ring->wptr) << 2 == 0x%08x " @@ -299,8 +301,7 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); - adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + WRITE_ONCE(*wb, (ring->wptr << 2)); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); @@ -493,13 +494,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) */ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) { - u32 f32_cntl; + u32 f32_cntl, phase_quantum = 0; int i; + if (amdgpu_sdma_phase_quantum) { + unsigned value = amdgpu_sdma_phase_quantum; + unsigned unit = 0; + + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) { + value = (value + 1) >> 1; + unit++; + } + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) { + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT); + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT); + WARN_ONCE(1, + "clamping sdma_phase_quantum to %uK clock cycles\n", + value << unit); + } + phase_quantum = + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT | + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; + } + for (i = 0; i < adev->sdma.num_instances; i++) { f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); + if (enable && amdgpu_sdma_phase_quantum) { + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM), + phase_quantum); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM), + phase_quantum); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM), + phase_quantum); + } WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); } @@ -541,12 +574,13 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; u32 rb_bufsz; u32 wb_offset; u32 doorbell; u32 doorbell_offset; u32 temp; + u64 wptr_gpu_addr; int i, r; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -628,6 +662,19 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); } + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + if (amdgpu_sriov_vf(adev)) + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); + else + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); @@ -655,6 +702,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + } return 0; @@ -751,15 +799,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) const struct sdma_firmware_header_v1_0 *hdr; const __le32 *fw_data; u32 fw_size; - u32 digest_size = 0; int i, j; /* halt the MEs */ sdma_v4_0_enable(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - uint16_t version_major; - uint16_t version_minor; if (!adev->sdma.instance[i].fw) return -EINVAL; @@ -767,23 +812,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) amdgpu_ucode_print_sdma_hdr(&hdr->header); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - version_major = le16_to_cpu(hdr->header.header_version_major); - version_minor = le16_to_cpu(hdr->header.header_version_minor); - - if (version_major == 1 && version_minor >= 1) { - const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr = (const struct sdma_firmware_header_v1_1 *) hdr; - digest_size = le32_to_cpu(sdma_v1_1_hdr->digest_size); - } - - fw_size -= digest_size; - fw_data = (const __le32 *) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); - for (j = 0; j < fw_size; j++) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 4267fa417997aeaf6336297f25a05c31c7ae4eb8..8284d5dbfc30c5227088de36d269459e3c3511b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1150,6 +1150,33 @@ static bool si_read_disabled_bios(struct amdgpu_device *adev) return r; } +#define mmROM_INDEX 0x2A +#define mmROM_DATA 0x2B + +static bool si_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes) +{ + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + /* set rom index to 0 */ + WREG32(mmROM_INDEX, 0); + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(mmROM_DATA); + + return true; +} + //xxx: not implemented static int si_asic_reset(struct amdgpu_device *adev) { @@ -1206,6 +1233,7 @@ static void si_detect_hw_virtualization(struct amdgpu_device *adev) static const struct amdgpu_asic_funcs si_asic_funcs = { .read_disabled_bios = &si_read_disabled_bios, + .read_bios_from_rom = &si_read_bios_from_rom, .read_register = &si_read_register, .reset = &si_asic_reset, .set_vga_state = &si_vga_set_state, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index a7ad8390981c7cfb38b9e530fe00d4c8d68f034c..d63873f3f57436cdda4a76ad542f4512a55b0b7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -2055,6 +2055,7 @@ static void si_initialize_powertune_defaults(struct amdgpu_device *adev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a7341d88a32024728866e728c689cd946c1c099d..f2c3a49f73a0051b74c602990d6e18ea64928eda 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -25,7 +25,7 @@ #include #include #include "amdgpu.h" -#include "amdgpu_atomfirmware.h" +#include "amdgpu_atombios.h" #include "amdgpu_ih.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -62,8 +62,6 @@ #include "dce_virtual.h" #include "mxgpu_ai.h" -MODULE_FIRMWARE("amdgpu/vega10_smc.bin"); - #define mmFabricConfigAccessControl 0x0410 #define mmFabricConfigAccessControl_BASE_IDX 0 #define mmFabricConfigAccessControl_DEFAULT 0x00000000 @@ -198,6 +196,50 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) spin_unlock_irqrestore(&adev->didt_idx_lock, flags); } +static u32 soc15_gc_cac_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg)); + r = RREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA); + spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); + return r; +} + +static void soc15_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->gc_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, (reg)); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, (v)); + spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags); +} + +static u32 soc15_se_cac_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->se_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg)); + r = RREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA); + spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags); + return r; +} + +static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&adev->se_cac_idx_lock, flags); + WREG32_SOC15(GC, 0, mmSE_CAC_IND_INDEX, (reg)); + WREG32_SOC15(GC, 0, mmSE_CAC_IND_DATA, (v)); + spin_unlock_irqrestore(&adev->se_cac_idx_lock, flags); +} + static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { if (adev->flags & AMD_IS_APU) @@ -392,11 +434,11 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) static int soc15_asic_reset(struct amdgpu_device *adev) { - amdgpu_atomfirmware_scratch_regs_engine_hung(adev, true); + amdgpu_atombios_scratch_regs_engine_hung(adev, true); soc15_gpu_pci_config_reset(adev); - amdgpu_atomfirmware_scratch_regs_engine_hung(adev, false); + amdgpu_atombios_scratch_regs_engine_hung(adev, false); return 0; } @@ -524,13 +566,6 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) return nbio_v6_1_get_rev_id(adev); } - -int gmc_v9_0_mc_wait_for_idle(struct amdgpu_device *adev) -{ - /* to be implemented in MC IP*/ - return 0; -} - static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, @@ -557,6 +592,10 @@ static int soc15_common_early_init(void *handle) adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; adev->didt_wreg = &soc15_didt_wreg; + adev->gc_cac_rreg = &soc15_gc_cac_rreg; + adev->gc_cac_wreg = &soc15_gc_cac_wreg; + adev->se_cac_rreg = &soc15_se_cac_rreg; + adev->se_cac_wreg = &soc15_se_cac_wreg; adev->asic_funcs = &soc15_asic_funcs; @@ -681,6 +720,9 @@ static int soc15_common_hw_init(void *handle) soc15_pcie_gen3_enable(adev); /* enable aspm */ soc15_program_aspm(adev); + /* setup nbio registers */ + if (!(adev->flags & AMD_IS_APU)) + nbio_v6_1_init_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index e2d330eed952cade98142d943ce2ceafa3c5e461..7a8e4e28abb2a9bf3b68933f80c21596920c4781 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -77,6 +77,13 @@ struct nbio_pcie_index_data { (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ (ip##_BASE__INST##inst##_SEG4 + reg))))), value) +#define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ + WREG32_NO_KIQ( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index e79befd80eed98ff4d9ee702ba5d6945ca132f44..7f408f85fdb631ee7bccda52ec444a7ec87942f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -250,6 +250,7 @@ #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) #define PACKET3_SCRATCH_RAM_WRITE 0x7D #define PACKET3_SCRATCH_RAM_READ 0x7E #define PACKET3_LOAD_CONST_RAM 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 987b958368acdb411f63f9a73edb71da97aa2a7b..23a85750edd6fba1c591709dbededb92e54dfec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -165,6 +165,9 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + if (amdgpu_sriov_vf(adev)) + return 0; + r = amdgpu_ring_alloc(ring, 16); if (r) { DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", @@ -432,13 +435,19 @@ static int uvd_v7_0_sw_init(void *handle) return r; } - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.ring_enc[i]; sprintf(ring->name, "uvd_enc%d", i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + + /* currently only use the first enconding ring for + * sriov, so set unused location for other unused rings. + */ + if (i == 0) + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + else + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); if (r) @@ -685,6 +694,11 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, /* 4, set resp to zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); + WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); + adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0; + adev->uvd.ring_enc[0].wptr = 0; + adev->uvd.ring_enc[0].wptr_old = 0; + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); @@ -702,7 +716,6 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); return -EBUSY; } - WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); return 0; } @@ -736,11 +749,9 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) init_table += header->uvd_table_offset; ring = &adev->uvd.ring; + ring->wptr = 0; size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); - /* disable clock gating */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), - ~UVD_POWER_STATUS__UVD_PG_MODE_MASK, 0); MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0xFFFFFFFF, 0x00000004); /* mc resume*/ @@ -777,12 +788,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), - adev->gfx.config.gb_addr_config); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), - adev->gfx.config.gb_addr_config); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), - adev->gfx.config.gb_addr_config); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); /* mc resume end*/ @@ -819,17 +824,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) UVD_LMI_CTRL__REQ_MODE_MASK | 0x00100000L)); - /* disable byte swapping */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), 0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), 0); - - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); - /* take all subblocks out of reset, except VCPU */ MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); @@ -838,15 +832,6 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), UVD_VCPU_CNTL__CLK_EN_MASK); - /* enable UMC */ - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); - - /* boot up the VCPU */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); - - MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); - /* enable master interrupt */ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), @@ -859,40 +844,31 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) /* force RBC into idle state */ size = order_base_2(ring->ring_size); tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); - /* set the write pointer delay */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); - - /* set the wb address */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), - (upper_32_bits(ring->gpu_addr) >> 2)); - - /* programm the RB_BASE for ring buffer */ - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), - lower_32_bits(ring->gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), - upper_32_bits(ring->gpu_addr)); - - ring->wptr = 0; ring = &adev->uvd.ring_enc[0]; + ring->wptr = 0; MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); + /* boot up the VCPU */ + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); + + /* enable UMC */ + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0); + + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02); + /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; header->uvd_table_size = table_size; - return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); } - return -EINVAL; /* already initializaed ? */ + return uvd_v7_0_mmsch_start(adev, &adev->virt.mm_table); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 1ecd6bb90c1f2b8372b160ce0b1fed8fa0263e28..11134d5f744335fcaaf9987b288d8127feb52ec8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -173,6 +173,11 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, /* 4, set resp to zero */ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); + WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); + adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0; + adev->vce.ring[0].wptr = 0; + adev->vce.ring[0].wptr_old = 0; + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); @@ -190,7 +195,6 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); return -EBUSY; } - WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); return 0; } @@ -274,7 +278,8 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), - 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, + VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); /* end of MC_RESUME */ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), @@ -296,11 +301,9 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; header->vce_table_size = table_size; - - return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); } - return -EINVAL; /* already initializaed ? */ + return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); } /** @@ -443,12 +446,14 @@ static int vce_v4_0_sw_init(void *handle) if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ ring->use_doorbell = true; + + /* currently only use the first encoding ring for sriov, + * so set unused location for other unused rings. + */ if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; - else if (i == 1) - ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; + ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; else - ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; + ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); if (r) @@ -990,11 +995,13 @@ static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, { uint32_t val = 0; - if (state == AMDGPU_IRQ_STATE_ENABLE) - val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; + if (!amdgpu_sriov_vf(adev)) { + if (state == AMDGPU_IRQ_STATE_ENABLE) + val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; - WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, - ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, + ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6cac291c96da207aec10e971d9cae70ad62aed71..9ff69b90df363c4ee95472e2133f4f6d2059996e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1028,8 +1028,7 @@ static int vi_common_early_init(void *handle) /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_GFX_SMG | + adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_UVD | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6316aad43a737cbeb8595c0f8f76fdb2450045b3..e4a8c2e52cb2c14ab566d015b178a2e6bd243cac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -142,12 +142,12 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, struct kfd_ioctl_create_queue_args *args) { if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { - pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { - pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } @@ -155,26 +155,26 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, (!access_ok(VERIFY_WRITE, (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { - pr_err("kfd: can't access ring base address\n"); + pr_err("Can't access ring base address\n"); return -EFAULT; } if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { - pr_err("kfd: ring size must be a power of 2 or 0\n"); + pr_err("Ring size must be a power of 2 or 0\n"); return -EINVAL; } if (!access_ok(VERIFY_WRITE, (const void __user *) args->read_pointer_address, sizeof(uint32_t))) { - pr_err("kfd: can't access read pointer\n"); + pr_err("Can't access read pointer\n"); return -EFAULT; } if (!access_ok(VERIFY_WRITE, (const void __user *) args->write_pointer_address, sizeof(uint32_t))) { - pr_err("kfd: can't access write pointer\n"); + pr_err("Can't access write pointer\n"); return -EFAULT; } @@ -182,7 +182,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, !access_ok(VERIFY_WRITE, (const void __user *) args->eop_buffer_address, sizeof(uint32_t))) { - pr_debug("kfd: can't access eop buffer"); + pr_debug("Can't access eop buffer"); return -EFAULT; } @@ -190,7 +190,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, !access_ok(VERIFY_WRITE, (const void __user *) args->ctx_save_restore_address, sizeof(uint32_t))) { - pr_debug("kfd: can't access ctx save restore buffer"); + pr_debug("Can't access ctx save restore buffer"); return -EFAULT; } @@ -219,27 +219,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, else q_properties->format = KFD_QUEUE_FORMAT_PM4; - pr_debug("Queue Percentage (%d, %d)\n", + pr_debug("Queue Percentage: %d, %d\n", q_properties->queue_percent, args->queue_percentage); - pr_debug("Queue Priority (%d, %d)\n", + pr_debug("Queue Priority: %d, %d\n", q_properties->priority, args->queue_priority); - pr_debug("Queue Address (0x%llX, 0x%llX)\n", + pr_debug("Queue Address: 0x%llX, 0x%llX\n", q_properties->queue_address, args->ring_base_address); - pr_debug("Queue Size (0x%llX, %u)\n", + pr_debug("Queue Size: 0x%llX, %u\n", q_properties->queue_size, args->ring_size); - pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n", - (uint64_t) q_properties->read_ptr, - (uint64_t) q_properties->write_ptr); + pr_debug("Queue r/w Pointers: %p, %p\n", + q_properties->read_ptr, + q_properties->write_ptr); - pr_debug("Queue Format (%d)\n", q_properties->format); + pr_debug("Queue Format: %d\n", q_properties->format); - pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address); + pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); - pr_debug("Queue CTX save arex (0x%llX)\n", + pr_debug("Queue CTX save area: 0x%llX\n", q_properties->ctx_save_restore_area_address); return 0; @@ -257,16 +257,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, memset(&q_properties, 0, sizeof(struct queue_properties)); - pr_debug("kfd: creating queue ioctl\n"); + pr_debug("Creating queue ioctl\n"); err = set_queue_properties_from_user(&q_properties, args); if (err) return err; - pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id); + pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) { - pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id); + if (!dev) { + pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); return -EINVAL; } @@ -278,7 +278,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } - pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n", + pr_debug("Creating queue for PASID %d on gpu 0x%x\n", p->pasid, dev->id); @@ -296,15 +296,15 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, mutex_unlock(&p->mutex); - pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); + pr_debug("Queue id %d was created successfully\n", args->queue_id); - pr_debug("ring buffer address == 0x%016llX\n", + pr_debug("Ring buffer address == 0x%016llX\n", args->ring_base_address); - pr_debug("read ptr address == 0x%016llX\n", + pr_debug("Read ptr address == 0x%016llX\n", args->read_pointer_address); - pr_debug("write ptr address == 0x%016llX\n", + pr_debug("Write ptr address == 0x%016llX\n", args->write_pointer_address); return 0; @@ -321,7 +321,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, int retval; struct kfd_ioctl_destroy_queue_args *args = data; - pr_debug("kfd: destroying queue id %d for PASID %d\n", + pr_debug("Destroying queue id %d for pasid %d\n", args->queue_id, p->pasid); @@ -341,12 +341,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, struct queue_properties properties; if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { - pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { - pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } @@ -354,12 +354,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, (!access_ok(VERIFY_WRITE, (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { - pr_err("kfd: can't access ring base address\n"); + pr_err("Can't access ring base address\n"); return -EFAULT; } if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { - pr_err("kfd: ring size must be a power of 2 or 0\n"); + pr_err("Ring size must be a power of 2 or 0\n"); return -EINVAL; } @@ -368,7 +368,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, properties.queue_percent = args->queue_percentage; properties.priority = args->queue_priority; - pr_debug("kfd: updating queue id %d for PASID %d\n", + pr_debug("Updating queue id %d for pasid %d\n", args->queue_id, p->pasid); mutex_lock(&p->mutex); @@ -400,7 +400,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep, } dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; mutex_lock(&p->mutex); @@ -443,7 +443,7 @@ static int kfd_ioctl_dbg_register(struct file *filep, long status = 0; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -460,12 +460,11 @@ static int kfd_ioctl_dbg_register(struct file *filep, */ pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - mutex_unlock(&p->mutex); - mutex_unlock(kfd_get_dbgmgr_mutex()); - return PTR_ERR(pdd); + status = PTR_ERR(pdd); + goto out; } - if (dev->dbgmgr == NULL) { + if (!dev->dbgmgr) { /* In case of a legal call, we have no dbgmgr yet */ create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); if (create_ok) { @@ -480,6 +479,7 @@ static int kfd_ioctl_dbg_register(struct file *filep, status = -EINVAL; } +out: mutex_unlock(&p->mutex); mutex_unlock(kfd_get_dbgmgr_mutex()); @@ -494,7 +494,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, long status; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -505,7 +505,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep, mutex_lock(kfd_get_dbgmgr_mutex()); status = kfd_dbgmgr_unregister(dev->dbgmgr, p); - if (status == 0) { + if (!status) { kfd_dbgmgr_destroy(dev->dbgmgr); dev->dbgmgr = NULL; } @@ -539,7 +539,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -580,8 +580,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { - kfree(args_buff); - return -EINVAL; + status = -EINVAL; + goto out; } watch_mask_value = (uint64_t) args_buff[args_idx]; @@ -604,8 +604,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, } if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { - kfree(args_buff); - return -EINVAL; + status = -EINVAL; + goto out; } /* Currently HSA Event is not supported for DBG */ @@ -617,6 +617,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep, mutex_unlock(kfd_get_dbgmgr_mutex()); +out: kfree(args_buff); return status; @@ -646,7 +647,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep, sizeof(wac_info.trapId); dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; if (dev->device_info->asic_family == CHIP_CARRIZO) { @@ -782,8 +783,9 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, "scratch_limit %llX\n", pdd->scratch_limit); args->num_of_nodes++; - } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && - (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } mutex_unlock(&p->mutex); @@ -846,9 +848,84 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, return err; } +static int kfd_ioctl_set_scratch_backing_va(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_scratch_backing_va_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + long err; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto bind_process_to_device_fail; + } + + pdd->qpd.sh_hidden_private_base = args->va_addr; + + mutex_unlock(&p->mutex); + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + dev->kfd2kgd->set_scratch_backing_va( + dev->kgd, args->va_addr, pdd->qpd.vmid); + + return 0; + +bind_process_to_device_fail: + mutex_unlock(&p->mutex); + return err; +} + +static int kfd_ioctl_get_tile_config(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_tile_config_args *args = data; + struct kfd_dev *dev; + struct tile_config config; + int err = 0; + + dev = kfd_device_by_id(args->gpu_id); + + dev->kfd2kgd->get_tile_config(dev->kgd, &config); + + args->gb_addr_config = config.gb_addr_config; + args->num_banks = config.num_banks; + args->num_ranks = config.num_ranks; + + if (args->num_tile_configs > config.num_tile_configs) + args->num_tile_configs = config.num_tile_configs; + err = copy_to_user((void __user *)args->tile_config_ptr, + config.tile_config_ptr, + args->num_tile_configs * sizeof(uint32_t)); + if (err) { + args->num_tile_configs = 0; + return -EFAULT; + } + + if (args->num_macro_tile_configs > config.num_macro_tile_configs) + args->num_macro_tile_configs = + config.num_macro_tile_configs; + err = copy_to_user((void __user *)args->macro_tile_config_ptr, + config.macro_tile_config_ptr, + args->num_macro_tile_configs * sizeof(uint32_t)); + if (err) { + args->num_macro_tile_configs = 0; + return -EFAULT; + } + + return 0; +} #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ - [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ + .cmd_drv = 0, .name = #ioctl} /** Ioctl table */ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { @@ -899,6 +976,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, kfd_ioctl_dbg_wave_control, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, + kfd_ioctl_set_scratch_backing_va, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, + kfd_ioctl_get_tile_config, 0) }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index d5e19b5fbbfb00d1fbbcd1066da0ac4f12424ec3..0aa021aa0aa19d6c1070388b7d80d276907ac22c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -42,8 +42,6 @@ static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) { - BUG_ON(!dev || !dev->kfd2kgd); - dev->kfd2kgd->address_watch_disable(dev->kgd); } @@ -62,7 +60,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, unsigned int *ib_packet_buff; int status; - BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); + if (WARN_ON(!size_in_bytes)) + return -EINVAL; kq = dbgdev->kq; @@ -77,8 +76,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, status = kq->ops.acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), &ib_packet_buff); - if (status != 0) { - pr_err("amdkfd: acquire_packet_buffer failed\n"); + if (status) { + pr_err("acquire_packet_buffer failed\n"); return status; } @@ -115,8 +114,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), &mem_obj); - if (status != 0) { - pr_err("amdkfd: Failed to allocate GART memory\n"); + if (status) { + pr_err("Failed to allocate GART memory\n"); kq->ops.rollback_packet(kq); return status; } @@ -168,8 +167,6 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) { - BUG_ON(!dbgdev); - /* * no action is needed in this case, * just make sure diq will not be used @@ -187,14 +184,12 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) struct kernel_queue *kq = NULL; int status; - BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); - status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, &properties, 0, KFD_QUEUE_TYPE_DIQ, &qid); if (status) { - pr_err("amdkfd: Failed to create DIQ\n"); + pr_err("Failed to create DIQ\n"); return status; } @@ -202,8 +197,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) kq = pqm_get_kernel_queue(dbgdev->pqm, qid); - if (kq == NULL) { - pr_err("amdkfd: Error getting DIQ\n"); + if (!kq) { + pr_err("Error getting DIQ\n"); pqm_destroy_queue(dbgdev->pqm, qid); return -EFAULT; } @@ -215,8 +210,6 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) { - BUG_ON(!dbgdev || !dbgdev->dev); - /* disable watch address */ dbgdev_address_watch_disable_nodiq(dbgdev->dev); return 0; @@ -227,8 +220,6 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) /* todo - disable address watch */ int status; - BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); - status = pqm_destroy_queue(dbgdev->pqm, dbgdev->kq->queue->properties.queue_id); dbgdev->kq = NULL; @@ -245,14 +236,12 @@ static void dbgdev_address_watch_set_registers( { union ULARGE_INTEGER addr; - BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); - addr.quad_part = 0; addrHi->u32All = 0; addrLo->u32All = 0; cntl->u32All = 0; - if (adw_info->watch_mask != NULL) + if (adw_info->watch_mask) cntl->bitfields.mask = (uint32_t) (adw_info->watch_mask[index] & ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); @@ -279,7 +268,7 @@ static void dbgdev_address_watch_set_registers( } static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) + struct dbg_address_watch_info *adw_info) { union TCP_WATCH_ADDR_H_BITS addrHi; union TCP_WATCH_ADDR_L_BITS addrLo; @@ -287,13 +276,11 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, struct kfd_process_device *pdd; unsigned int i; - BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); - /* taking the vmid for that process on the safe way using pdd */ pdd = kfd_get_process_device_data(dbgdev->dev, adw_info->process); if (!pdd) { - pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + pr_err("Failed to get pdd for wave control no DIQ\n"); return -EFAULT; } @@ -303,17 +290,16 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) { - pr_err("amdkfd: num_watch_points is invalid\n"); + pr_err("num_watch_points is invalid\n"); return -EINVAL; } - if ((adw_info->watch_mode == NULL) || - (adw_info->watch_address == NULL)) { - pr_err("amdkfd: adw_info fields are not valid\n"); + if (!adw_info->watch_mode || !adw_info->watch_address) { + pr_err("adw_info fields are not valid\n"); return -EINVAL; } - for (i = 0 ; i < adw_info->num_watch_points ; i++) { + for (i = 0; i < adw_info->num_watch_points; i++) { dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, &cntl, i, pdd->qpd.vmid); @@ -348,7 +334,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, } static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, - struct dbg_address_watch_info *adw_info) + struct dbg_address_watch_info *adw_info) { struct pm4__set_config_reg *packets_vec; union TCP_WATCH_ADDR_H_BITS addrHi; @@ -363,28 +349,25 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, /* we do not control the vmid in DIQ mode, just a place holder */ unsigned int vmid = 0; - BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); - addrHi.u32All = 0; addrLo.u32All = 0; cntl.u32All = 0; if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || (adw_info->num_watch_points == 0)) { - pr_err("amdkfd: num_watch_points is invalid\n"); + pr_err("num_watch_points is invalid\n"); return -EINVAL; } - if ((NULL == adw_info->watch_mode) || - (NULL == adw_info->watch_address)) { - pr_err("amdkfd: adw_info fields are not valid\n"); + if (!adw_info->watch_mode || !adw_info->watch_address) { + pr_err("adw_info fields are not valid\n"); return -EINVAL; } status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); - if (status != 0) { - pr_err("amdkfd: Failed to allocate GART memory\n"); + if (status) { + pr_err("Failed to allocate GART memory\n"); return status; } @@ -442,8 +425,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_CNTL); - aw_reg_add_dword /= sizeof(uint32_t); - packets_vec[0].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; @@ -455,8 +436,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_ADDR_HI); - aw_reg_add_dword /= sizeof(uint32_t); - packets_vec[1].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[1].reg_data[0] = addrHi.u32All; @@ -467,8 +446,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_ADDR_LO); - aw_reg_add_dword /= sizeof(uint32_t); - packets_vec[2].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[2].reg_data[0] = addrLo.u32All; @@ -485,8 +462,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, i, ADDRESS_WATCH_REG_CNTL); - aw_reg_add_dword /= sizeof(uint32_t); - packets_vec[3].bitfields2.reg_offset = aw_reg_add_dword - AMD_CONFIG_REG_BASE; packets_vec[3].reg_data[0] = cntl.u32All; @@ -498,8 +473,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, packet_buff_uint, ib_size); - if (status != 0) { - pr_err("amdkfd: Failed to submit IB to DIQ\n"); + if (status) { + pr_err("Failed to submit IB to DIQ\n"); break; } } @@ -518,8 +493,6 @@ static int dbgdev_wave_control_set_registers( union GRBM_GFX_INDEX_BITS reg_gfx_index; struct HsaDbgWaveMsgAMDGen2 *pMsg; - BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); - reg_sq_cmd.u32All = 0; reg_gfx_index.u32All = 0; pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; @@ -620,18 +593,16 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, struct pm4__set_config_reg *packets_vec; size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; - BUG_ON(!dbgdev || !wac_info); - reg_sq_cmd.u32All = 0; status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, ®_gfx_index); if (status) { - pr_err("amdkfd: Failed to set wave control registers\n"); + pr_err("Failed to set wave control registers\n"); return status; } - /* we do not control the VMID in DIQ,so reset it to a known value */ + /* we do not control the VMID in DIQ, so reset it to a known value */ reg_sq_cmd.bits.vm_id = 0; pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); @@ -667,7 +638,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); if (status != 0) { - pr_err("amdkfd: Failed to allocate GART memory\n"); + pr_err("Failed to allocate GART memory\n"); return status; } @@ -719,8 +690,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, packet_buff_uint, ib_size); - if (status != 0) - pr_err("amdkfd: Failed to submit IB to DIQ\n"); + if (status) + pr_err("Failed to submit IB to DIQ\n"); kfd_gtt_sa_free(dbgdev->dev, mem_obj); @@ -735,21 +706,19 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, union GRBM_GFX_INDEX_BITS reg_gfx_index; struct kfd_process_device *pdd; - BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); - reg_sq_cmd.u32All = 0; /* taking the VMID for that process on the safe way using PDD */ pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); if (!pdd) { - pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + pr_err("Failed to get pdd for wave control no DIQ\n"); return -EFAULT; } status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, ®_gfx_index); if (status) { - pr_err("amdkfd: Failed to set wave control registers\n"); + pr_err("Failed to set wave control registers\n"); return status; } @@ -818,12 +787,13 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. * ATC_VMID15_PASID_MAPPING - * to check which VMID the current process is mapped to. */ + * to check which VMID the current process is mapped to. + */ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid (dev->kgd, vmid)) { - if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid (dev->kgd, vmid) == p->pasid) { pr_debug("Killing wave fronts of vmid %d and pasid %d\n", vmid, p->pasid); @@ -833,7 +803,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) } if (vmid > last_vmid_to_scan) { - pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); + pr_err("Didn't find vmid for pasid %d\n", p->pasid); return -EFAULT; } @@ -860,8 +830,6 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, enum DBGDEV_TYPE type) { - BUG_ON(!pdbgdev || !pdev); - pdbgdev->dev = pdev; pdbgdev->kq = NULL; pdbgdev->type = type; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 56d6763963429d96f2d715d69ddf82436e649c09..3da25f7bda6ba58ca77d0e15816fb194634eec68 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -44,8 +44,6 @@ struct mutex *kfd_get_dbgmgr_mutex(void) static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) { - BUG_ON(!pmgr); - kfree(pmgr->dbgdev); pmgr->dbgdev = NULL; @@ -55,7 +53,7 @@ static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) { - if (pmgr != NULL) { + if (pmgr) { kfd_dbgmgr_uninitialize(pmgr); kfree(pmgr); } @@ -66,12 +64,12 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; struct kfd_dbgmgr *new_buff; - BUG_ON(pdev == NULL); - BUG_ON(!pdev->init_complete); + if (WARN_ON(!pdev->init_complete)) + return false; new_buff = kfd_alloc_struct(new_buff); if (!new_buff) { - pr_err("amdkfd: Failed to allocate dbgmgr instance\n"); + pr_err("Failed to allocate dbgmgr instance\n"); return false; } @@ -79,7 +77,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) new_buff->dev = pdev; new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); if (!new_buff->dbgdev) { - pr_err("amdkfd: Failed to allocate dbgdev instance\n"); + pr_err("Failed to allocate dbgdev instance\n"); kfree(new_buff); return false; } @@ -96,8 +94,6 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { - BUG_ON(!p || !pmgr || !pmgr->dbgdev); - if (pmgr->pasid != 0) { pr_debug("H/W debugger is already active using pasid %d\n", pmgr->pasid); @@ -118,8 +114,6 @@ long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) { - BUG_ON(!p || !pmgr || !pmgr->dbgdev); - /* Is the requests coming from the already registered process? */ if (pmgr->pasid != p->pasid) { pr_debug("H/W debugger is not registered by calling pasid %d\n", @@ -137,8 +131,6 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, struct dbg_wave_control_info *wac_info) { - BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info); - /* Is the requests coming from the already registered process? */ if (pmgr->pasid != wac_info->process->pasid) { pr_debug("H/W debugger support was not registered for requester pasid %d\n", @@ -152,9 +144,6 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, struct dbg_address_watch_info *adw_info) { - BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); - - /* Is the requests coming from the already registered process? */ if (pmgr->pasid != adw_info->process->pasid) { pr_debug("H/W debugger support was not registered for requester pasid %d\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h index 257a745ad0b55ee9cf59545f42c0eb484e27f559..a04a1fe1d0d935c389f9a4796b9467d9101fd0e7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -30,13 +30,11 @@ #pragma pack(push, 4) enum HSA_DBG_WAVEOP { - HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ - HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ - HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ - HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter - debug mode */ - HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take - a trap */ + HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ + HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ + HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ + HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */ + HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */ HSA_DBG_NUM_WAVEOP = 5, HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF }; @@ -81,15 +79,13 @@ struct HsaDbgWaveMsgAMDGen2 { uint32_t UserData:8; /* user data */ uint32_t ShaderArray:1; /* Shader array */ uint32_t Priv:1; /* Privileged */ - uint32_t Reserved0:4; /* This field is reserved, - should be 0 */ + uint32_t Reserved0:4; /* Reserved, should be 0 */ uint32_t WaveId:4; /* wave id */ uint32_t SIMD:2; /* SIMD id */ uint32_t HSACU:4; /* Compute unit */ uint32_t ShaderEngine:2;/* Shader engine */ uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ - uint32_t Reserved1:4; /* This field is reserved, - should be 0 */ + uint32_t Reserved1:4; /* Reserved, should be 0 */ } ui32; uint32_t Value; }; @@ -121,20 +117,23 @@ struct HsaDbgWaveMessage { * in the user mode instruction stream. The OS scheduler event is typically * associated and signaled by an interrupt issued by the GPU, but other HSA * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced - * by the KFD by this mechanism, too. */ + * by the KFD by this mechanism, too. + */ /* these are the new definitions for events */ enum HSA_EVENTTYPE { HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change - (start/stop) */ + * (start/stop) + */ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state - (EOP pm4) */ + * (EOP pm4) + */ /* ... */ HSA_EVENTTYPE_MAXID, HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3f95f7cb4019468b1011e0483fa50192d0d45f6a..61fff25b4ce7dadbb789b00fca8b09d40835baa4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -26,7 +26,7 @@ #include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" -#include "kfd_pm4_headers.h" +#include "kfd_pm4_headers_vi.h" #define MQD_SIZE_ALIGNED 768 @@ -98,11 +98,14 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did) for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { if (supported_devices[i].did == did) { - BUG_ON(supported_devices[i].device_info == NULL); + WARN_ON(!supported_devices[i].device_info); return supported_devices[i].device_info; } } + dev_warn(kfd_device, "DID %04x is missing in supported_devices\n", + did); + return NULL; } @@ -114,8 +117,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, const struct kfd_device_info *device_info = lookup_device_info(pdev->device); - if (!device_info) + if (!device_info) { + dev_err(kfd_device, "kgd2kfd_probe failed\n"); return NULL; + } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); if (!kfd) @@ -152,15 +157,16 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd) } if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", + dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) + != 0); return false; } pasid_limit = min_t(unsigned int, - (unsigned int)1 << kfd->device_info->max_pasid_bits, + (unsigned int)(1 << kfd->device_info->max_pasid_bits), iommu_info.max_pasids); /* * last pasid is used for kernel queues doorbells @@ -211,9 +217,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, flags); dev = kfd_device_by_pci_dev(pdev); - BUG_ON(dev == NULL); - - kfd_signal_iommu_event(dev, pasid, address, + if (!WARN_ON(!dev)) + kfd_signal_iommu_event(dev, pasid, address, flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); return AMD_IOMMU_INV_PRI_RSP_INVALID; @@ -234,9 +239,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, * calculate max size of runlist packet. * There can be only 2 packets at once */ - size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + - max_num_of_queues_per_device * - sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; + size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) + + max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) + + sizeof(struct pm4_mes_runlist)) * 2; /* Add size of HIQ & DIQ */ size += KFD_KERNEL_QUEUE_SIZE * 2; @@ -247,42 +252,37 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd->kfd2kgd->init_gtt_mem_allocation( kfd->kgd, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){ - dev_err(kfd_device, - "Could not allocate %d bytes for device (%x:%x)\n", - size, kfd->pdev->vendor, kfd->pdev->device); + dev_err(kfd_device, "Could not allocate %d bytes\n", size); goto out; } - dev_info(kfd_device, - "Allocated %d bytes on gart for device(%x:%x)\n", - size, kfd->pdev->vendor, kfd->pdev->device); + dev_info(kfd_device, "Allocated %d bytes on gart\n", size); /* Initialize GTT sa with 512 byte chunk size */ if (kfd_gtt_sa_init(kfd, size, 512) != 0) { - dev_err(kfd_device, - "Error initializing gtt sub-allocator\n"); + dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); goto kfd_gtt_sa_init_error; } - kfd_doorbell_init(kfd); - - if (kfd_topology_add_device(kfd) != 0) { + if (kfd_doorbell_init(kfd)) { dev_err(kfd_device, - "Error adding device (%x:%x) to topology\n", - kfd->pdev->vendor, kfd->pdev->device); + "Error initializing doorbell aperture\n"); + goto kfd_doorbell_error; + } + + if (kfd_topology_add_device(kfd)) { + dev_err(kfd_device, "Error adding device to topology\n"); goto kfd_topology_add_device_error; } if (kfd_interrupt_init(kfd)) { - dev_err(kfd_device, - "Error initializing interrupts for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); + dev_err(kfd_device, "Error initializing interrupts\n"); goto kfd_interrupt_error; } if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, - "Error initializing iommuv2 for device (%x:%x)\n", + "Error initializing iommuv2 for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); goto device_iommu_pasid_error; } @@ -292,15 +292,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->dqm = device_queue_manager_init(kfd); if (!kfd->dqm) { - dev_err(kfd_device, - "Error initializing queue manager for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); + dev_err(kfd_device, "Error initializing queue manager\n"); goto device_queue_manager_error; } - if (kfd->dqm->ops.start(kfd->dqm) != 0) { + if (kfd->dqm->ops.start(kfd->dqm)) { dev_err(kfd_device, - "Error starting queuen manager for device (%x:%x)\n", + "Error starting queue manager for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); goto dqm_start_error; } @@ -308,10 +306,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->dbgmgr = NULL; kfd->init_complete = true; - dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, + dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", + pr_debug("Starting kfd with the following scheduling policy %d\n", sched_policy); goto out; @@ -325,11 +323,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: + kfd_doorbell_fini(kfd); +kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); dev_err(kfd_device, - "device (%x:%x) NOT added due to errors\n", + "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); out: return kfd->init_complete; @@ -342,6 +342,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) amd_iommu_free_device(kfd->pdev); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); + kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); } @@ -351,8 +352,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) void kgd2kfd_suspend(struct kfd_dev *kfd) { - BUG_ON(kfd == NULL); - if (kfd->init_complete) { kfd->dqm->ops.stop(kfd->dqm); amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); @@ -366,14 +365,15 @@ int kgd2kfd_resume(struct kfd_dev *kfd) unsigned int pasid_limit; int err; - BUG_ON(kfd == NULL); - pasid_limit = kfd_get_pasid_limit(); if (kfd->init_complete) { err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err < 0) + if (err < 0) { + dev_err(kfd_device, "failed to initialize iommu\n"); return -ENXIO; + } + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback); amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); @@ -402,26 +402,27 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size) { - unsigned int num_of_bits; + unsigned int num_of_longs; - BUG_ON(!kfd); - BUG_ON(!kfd->gtt_mem); - BUG_ON(buf_size < chunk_size); - BUG_ON(buf_size == 0); - BUG_ON(chunk_size == 0); + if (WARN_ON(buf_size < chunk_size)) + return -EINVAL; + if (WARN_ON(buf_size == 0)) + return -EINVAL; + if (WARN_ON(chunk_size == 0)) + return -EINVAL; kfd->gtt_sa_chunk_size = chunk_size; kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; - num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE; - BUG_ON(num_of_bits == 0); + num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / + BITS_PER_LONG; - kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL); + kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); if (!kfd->gtt_sa_bitmap) return -ENOMEM; - pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", + pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); mutex_init(&kfd->gtt_sa_lock); @@ -455,8 +456,6 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, { unsigned int found, start_search, cur_size; - BUG_ON(!kfd); - if (size == 0) return -EINVAL; @@ -467,7 +466,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if ((*mem_obj) == NULL) return -ENOMEM; - pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size); + pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); start_search = 0; @@ -479,7 +478,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, kfd->gtt_sa_num_of_chunks, start_search); - pr_debug("kfd: found = %d\n", found); + pr_debug("Found = %d\n", found); /* If there wasn't any free chunk, bail out */ if (found == kfd->gtt_sa_num_of_chunks) @@ -497,12 +496,12 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, found, kfd->gtt_sa_chunk_size); - pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n", + pr_debug("gpu_addr = %p, cpu_addr = %p\n", (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); /* If we need only one chunk, mark it as allocated and get out */ if (size <= kfd->gtt_sa_chunk_size) { - pr_debug("kfd: single bit\n"); + pr_debug("Single bit\n"); set_bit(found, kfd->gtt_sa_bitmap); goto kfd_gtt_out; } @@ -537,7 +536,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, } while (cur_size > 0); - pr_debug("kfd: range_start = %d, range_end = %d\n", + pr_debug("range_start = %d, range_end = %d\n", (*mem_obj)->range_start, (*mem_obj)->range_end); /* Mark the chunks as allocated */ @@ -551,7 +550,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, return 0; kfd_gtt_no_free_chunk: - pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj); + pr_debug("Allocation failed with mem_obj = %p\n", mem_obj); mutex_unlock(&kfd->gtt_sa_lock); kfree(mem_obj); return -ENOMEM; @@ -561,13 +560,11 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) { unsigned int bit; - BUG_ON(!kfd); - /* Act like kfree when trying to free a NULL object */ if (!mem_obj) return 0; - pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", + pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", mem_obj, mem_obj->range_start, mem_obj->range_end); mutex_lock(&kfd->gtt_sa_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 602769ced3bd3a94e39640eaeff6b62f06107814..53a66e8216245fb601c7800c8f5d6663228cb767 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -79,20 +79,17 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) unsigned int get_queues_num(struct device_queue_manager *dqm) { - BUG_ON(!dqm || !dqm->dev); return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, KGD_MAX_QUEUES); } unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) { - BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.num_queue_per_pipe; } unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) { - BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.num_pipe_per_mec; } @@ -121,7 +118,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, /* Kaveri kfd vmid's starts from vmid 8 */ allocated_vmid = bit + KFD_VMID_START_OFFSET; - pr_debug("kfd: vmid allocation %d\n", allocated_vmid); + pr_debug("vmid allocation %d\n", allocated_vmid); qpd->vmid = allocated_vmid; q->properties.vmid = allocated_vmid; @@ -152,42 +149,38 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, { int retval; - BUG_ON(!dqm || !q || !qpd || !allocated_vmid); - - pr_debug("kfd: In func %s\n", __func__); print_queue(q); mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { - pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", + pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); - mutex_unlock(&dqm->lock); - return -EPERM; + retval = -EPERM; + goto out_unlock; } if (list_empty(&qpd->queues_list)) { retval = allocate_vmid(dqm, qpd, q); - if (retval != 0) { - mutex_unlock(&dqm->lock); - return retval; - } + if (retval) + goto out_unlock; } *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) retval = create_sdma_queue_nocpsch(dqm, q, qpd); + else + retval = -EINVAL; - if (retval != 0) { + if (retval) { if (list_empty(&qpd->queues_list)) { deallocate_vmid(dqm, qpd, q); *allocated_vmid = 0; } - mutex_unlock(&dqm->lock); - return retval; + goto out_unlock; } list_add(&q->list, &qpd->queues_list); @@ -205,8 +198,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); +out_unlock: mutex_unlock(&dqm->lock); - return 0; + return retval; } static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) @@ -216,7 +210,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) set = false; - for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm); + for (pipe = dqm->next_pipe_to_allocate, i = 0; + i < get_pipes_per_mec(dqm); pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { if (!is_pipe_enabled(dqm, 0, pipe)) @@ -239,8 +234,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) if (!set) return -EBUSY; - pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", - __func__, q->pipe, q->queue); + pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); /* horizontal hqd allocation */ dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); @@ -260,36 +254,38 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd; - BUG_ON(!dqm || !q || !qpd); - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (mqd == NULL) + if (!mqd) return -ENOMEM; retval = allocate_hqd(dqm, q); - if (retval != 0) + if (retval) return retval; retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval != 0) { - deallocate_hqd(dqm, q); - return retval; - } + if (retval) + goto out_deallocate_hqd; - pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", - q->pipe, - q->queue); + pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", + q->pipe, q->queue); - retval = mqd->load_mqd(mqd, q->mqd, q->pipe, - q->queue, (uint32_t __user *) q->properties.write_ptr); - if (retval != 0) { - deallocate_hqd(dqm, q); - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); - return retval; - } + dqm->dev->kfd2kgd->set_scratch_backing_va( + dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); + + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, + q->process->mm); + if (retval) + goto out_uninit_mqd; return 0; + +out_uninit_mqd: + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_hqd: + deallocate_hqd(dqm, q); + + return retval; } static int destroy_queue_nocpsch(struct device_queue_manager *dqm, @@ -299,12 +295,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd; - BUG_ON(!dqm || !q || !q->mqd || !qpd); - retval = 0; - pr_debug("kfd: In Func %s\n", __func__); - mutex_lock(&dqm->lock); if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { @@ -323,7 +315,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); } else { - pr_debug("q->properties.type is invalid (%d)\n", + pr_debug("q->properties.type %d is invalid\n", q->properties.type); retval = -EINVAL; goto out; @@ -334,7 +326,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, q->pipe, q->queue); - if (retval != 0) + if (retval) goto out; mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); @@ -364,14 +356,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) struct mqd_manager *mqd; bool prev_active = false; - BUG_ON(!dqm || !q || !q->mqd); - mutex_lock(&dqm->lock); mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (mqd == NULL) { - mutex_unlock(&dqm->lock); - return -ENOMEM; + if (!mqd) { + retval = -ENOMEM; + goto out_unlock; } if (q->properties.is_active) @@ -385,12 +375,13 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if ((q->properties.is_active) && (!prev_active)) dqm->queue_count++; - else if ((!q->properties.is_active) && (prev_active)) + else if (!q->properties.is_active && prev_active) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = execute_queues_cpsch(dqm, false); +out_unlock: mutex_unlock(&dqm->lock); return retval; } @@ -400,15 +391,16 @@ static struct mqd_manager *get_mqd_manager_nocpsch( { struct mqd_manager *mqd; - BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; - pr_debug("kfd: In func %s mqd type %d\n", __func__, type); + pr_debug("mqd type %d\n", type); mqd = dqm->mqds[type]; if (!mqd) { mqd = mqd_manager_init(type, dqm->dev); - if (mqd == NULL) - pr_err("kfd: mqd manager is NULL"); + if (!mqd) + pr_err("mqd manager is NULL"); dqm->mqds[type] = mqd; } @@ -421,11 +413,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, struct device_process_node *n; int retval; - BUG_ON(!dqm || !qpd); - - pr_debug("kfd: In func %s\n", __func__); - - n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); + n = kzalloc(sizeof(*n), GFP_KERNEL); if (!n) return -ENOMEM; @@ -449,10 +437,6 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm, int retval; struct device_process_node *cur, *next; - BUG_ON(!dqm || !qpd); - - pr_debug("In func %s\n", __func__); - pr_debug("qpd->queues_list is %s\n", list_empty(&qpd->queues_list) ? "empty" : "not empty"); @@ -493,51 +477,39 @@ static void init_interrupts(struct device_queue_manager *dqm) { unsigned int i; - BUG_ON(dqm == NULL); - for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) if (is_pipe_enabled(dqm, 0, i)) dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); } -static int init_scheduler(struct device_queue_manager *dqm) -{ - int retval = 0; - - BUG_ON(!dqm); - - pr_debug("kfd: In %s\n", __func__); - - return retval; -} - static int initialize_nocpsch(struct device_queue_manager *dqm) { - int i; + int pipe, queue; - BUG_ON(!dqm); + pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); - pr_debug("kfd: In func %s num of pipes: %d\n", - __func__, get_pipes_per_mec(dqm)); + dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), + sizeof(unsigned int), GFP_KERNEL); + if (!dqm->allocated_queues) + return -ENOMEM; mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; - dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), - sizeof(unsigned int), GFP_KERNEL); - if (!dqm->allocated_queues) { - mutex_destroy(&dqm->lock); - return -ENOMEM; - } - for (i = 0; i < get_pipes_per_mec(dqm); i++) - dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1; + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { + int pipe_offset = pipe * get_queues_per_pipe(dqm); + + for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) + if (test_bit(pipe_offset + queue, + dqm->dev->shared_resources.queue_bitmap)) + dqm->allocated_queues[pipe] |= 1 << queue; + } dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; - init_scheduler(dqm); return 0; } @@ -545,9 +517,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm) { int i; - BUG_ON(!dqm); - - BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) @@ -604,33 +574,34 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, return -ENOMEM; retval = allocate_sdma_queue(dqm, &q->sdma_id); - if (retval != 0) + if (retval) return retval; q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; - pr_debug("kfd: sdma id is: %d\n", q->sdma_id); - pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); - pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("SDMA id is: %d\n", q->sdma_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval != 0) { - deallocate_sdma_queue(dqm, q->sdma_id); - return retval; - } + if (retval) + goto out_deallocate_sdma_queue; - retval = mqd->load_mqd(mqd, q->mqd, 0, - 0, NULL); - if (retval != 0) { - deallocate_sdma_queue(dqm, q->sdma_id); - mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); - return retval; - } + retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); + if (retval) + goto out_uninit_mqd; return 0; + +out_uninit_mqd: + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_sdma_queue: + deallocate_sdma_queue(dqm, q->sdma_id); + + return retval; } /* @@ -642,10 +613,6 @@ static int set_sched_resources(struct device_queue_manager *dqm) int i, mec; struct scheduling_resources res; - BUG_ON(!dqm); - - pr_debug("kfd: In func %s\n", __func__); - res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; res.vmid_mask <<= KFD_VMID_START_OFFSET; @@ -663,8 +630,9 @@ static int set_sched_resources(struct device_queue_manager *dqm) /* This situation may be hit in the future if a new HW * generation exposes more than 64 queues. If so, the - * definition of res.queue_mask needs updating */ - if (WARN_ON(i > (sizeof(res.queue_mask)*8))) { + * definition of res.queue_mask needs updating + */ + if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { pr_err("Invalid queue enabled by amdgpu: %d\n", i); break; } @@ -674,9 +642,9 @@ static int set_sched_resources(struct device_queue_manager *dqm) res.gws_mask = res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; - pr_debug("kfd: scheduling resources:\n" - " vmid mask: 0x%8X\n" - " queue mask: 0x%8llX\n", + pr_debug("Scheduling resources:\n" + "vmid mask: 0x%8X\n" + "queue mask: 0x%8llX\n", res.vmid_mask, res.queue_mask); return pm_send_set_resources(&dqm->packets, &res); @@ -686,10 +654,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) { int retval; - BUG_ON(!dqm); - - pr_debug("kfd: In func %s num of pipes: %d\n", - __func__, get_pipes_per_mec(dqm)); + pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); mutex_init(&dqm->lock); INIT_LIST_HEAD(&dqm->queues); @@ -697,13 +662,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->sdma_queue_count = 0; dqm->active_runlist = false; retval = dqm->ops_asic_specific.initialize(dqm); - if (retval != 0) - goto fail_init_pipelines; - - return 0; + if (retval) + mutex_destroy(&dqm->lock); -fail_init_pipelines: - mutex_destroy(&dqm->lock); return retval; } @@ -712,25 +673,23 @@ static int start_cpsch(struct device_queue_manager *dqm) struct device_process_node *node; int retval; - BUG_ON(!dqm); - retval = 0; retval = pm_init(&dqm->packets, dqm); - if (retval != 0) + if (retval) goto fail_packet_manager_init; retval = set_sched_resources(dqm); - if (retval != 0) + if (retval) goto fail_set_sched_resources; - pr_debug("kfd: allocating fence memory\n"); + pr_debug("Allocating fence memory\n"); /* allocate fence memory on the gart */ retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), &dqm->fence_mem); - if (retval != 0) + if (retval) goto fail_allocate_vidmem; dqm->fence_addr = dqm->fence_mem->cpu_ptr; @@ -758,8 +717,6 @@ static int stop_cpsch(struct device_queue_manager *dqm) struct device_process_node *node; struct kfd_process_device *pdd; - BUG_ON(!dqm); - destroy_queues_cpsch(dqm, true, true); list_for_each_entry(node, &dqm->queues, list) { @@ -776,13 +733,9 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { - BUG_ON(!dqm || !kq || !qpd); - - pr_debug("kfd: In func %s\n", __func__); - mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { - pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n", + pr_warn("Can't create new kernel queue because %d queues were already created\n", dqm->total_queue_count); mutex_unlock(&dqm->lock); return -EPERM; @@ -809,10 +762,6 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd) { - BUG_ON(!dqm || !kq); - - pr_debug("kfd: In %s\n", __func__); - mutex_lock(&dqm->lock); /* here we actually preempt the DIQ */ destroy_queues_cpsch(dqm, true, false); @@ -844,8 +793,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd; - BUG_ON(!dqm || !q || !qpd); - retval = 0; if (allocate_vmid) @@ -854,7 +801,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mutex_lock(&dqm->lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { - pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", + pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; goto out; @@ -866,15 +813,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); - if (mqd == NULL) { - mutex_unlock(&dqm->lock); - return -ENOMEM; + if (!mqd) { + retval = -ENOMEM; + goto out; } dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); - if (retval != 0) + if (retval) goto out; list_add(&q->list, &qpd->queues_list); @@ -884,7 +831,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; + dqm->sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -903,12 +850,11 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout) { - BUG_ON(!fence_addr); timeout += jiffies; while (*fence_addr != fence_value) { if (time_after(jiffies, timeout)) { - pr_err("kfd: qcm fence wait loop timeout expired\n"); + pr_err("qcm fence wait loop timeout expired\n"); return -ETIME; } schedule(); @@ -932,8 +878,6 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, enum kfd_preempt_type_filter preempt_type; struct kfd_process_device *pdd; - BUG_ON(!dqm); - retval = 0; if (lock) @@ -941,7 +885,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) goto out; - pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", + pr_debug("Before destroying queues, sdma queue count is : %u\n", dqm->sdma_queue_count); if (dqm->sdma_queue_count > 0) { @@ -955,7 +899,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, preempt_type, 0, false, 0); - if (retval != 0) + if (retval) goto out; *dqm->fence_addr = KFD_FENCE_INIT; @@ -964,7 +908,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); - if (retval != 0) { + if (retval) { pdd = kfd_get_process_device_data(dqm->dev, kfd_get_process(current)); pdd->reset_wavefronts = true; @@ -983,14 +927,12 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) { int retval; - BUG_ON(!dqm); - if (lock) mutex_lock(&dqm->lock); retval = destroy_queues_cpsch(dqm, false, false); - if (retval != 0) { - pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); + if (retval) { + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption"); goto out; } @@ -1005,8 +947,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) } retval = pm_send_runlist(&dqm->packets, &dqm->queues); - if (retval != 0) { - pr_err("kfd: failed to execute runlist"); + if (retval) { + pr_err("failed to execute runlist"); goto out; } dqm->active_runlist = true; @@ -1025,8 +967,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd; bool preempt_all_queues; - BUG_ON(!dqm || !qpd || !q); - preempt_all_queues = false; retval = 0; @@ -1098,8 +1038,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, { bool retval; - pr_debug("kfd: In func %s\n", __func__); - mutex_lock(&dqm->lock); if (alternate_aperture_size == 0) { @@ -1120,14 +1058,11 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, uint64_t base = (uintptr_t)alternate_aperture_base; uint64_t limit = base + alternate_aperture_size - 1; - if (limit <= base) - goto out; - - if ((base & APE1_FIXED_BITS_MASK) != 0) - goto out; - - if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) + if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || + (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { + retval = false; goto out; + } qpd->sh_mem_ape1_base = base >> 16; qpd->sh_mem_ape1_limit = limit >> 16; @@ -1144,27 +1079,22 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); - pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", + pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", qpd->sh_mem_config, qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit); - mutex_unlock(&dqm->lock); - return retval; - out: mutex_unlock(&dqm->lock); - return false; + return retval; } struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) { struct device_queue_manager *dqm; - BUG_ON(!dev); + pr_debug("Loading device queue manager\n"); - pr_debug("kfd: loading device queue manager\n"); - - dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); + dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); if (!dqm) return NULL; @@ -1202,8 +1132,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.set_cache_memory_policy = set_cache_memory_policy; break; default: - BUG(); - break; + pr_err("Invalid scheduling policy %d\n", sched_policy); + goto out_free; } switch (dev->device_info->asic_family) { @@ -1216,18 +1146,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; } - if (dqm->ops.initialize(dqm) != 0) { - kfree(dqm); - return NULL; - } + if (!dqm->ops.initialize(dqm)) + return dqm; - return dqm; +out_free: + kfree(dqm); + return NULL; } void device_queue_manager_uninit(struct device_queue_manager *dqm) { - BUG_ON(!dqm); - dqm->ops.uninitialize(dqm); kfree(dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 48dc0561b40259350661e4f96fd44df5f60cbb81..72c3cbabc0a7c6b6b591c3a563a7c66677c28b18 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -24,6 +24,7 @@ #include "kfd_device_queue_manager.h" #include "cik_regs.h" #include "oss/oss_2_4_sh_mask.h" +#include "gca/gfx_7_2_sh_mask.h" static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd, @@ -65,7 +66,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) * for LDS/Scratch and GPUVM. */ - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0); return PRIVATE_BASE(top_address_nybble << 12) | @@ -104,8 +105,6 @@ static int register_process_cik(struct device_queue_manager *dqm, struct kfd_process_device *pdd; unsigned int temp; - BUG_ON(!dqm || !qpd); - pdd = qpd_to_pdd(qpd); /* check if sh_mem_config register already configured */ @@ -125,9 +124,10 @@ static int register_process_cik(struct device_queue_manager *dqm, } else { temp = get_sh_mem_bases_nybble_64(pdd); qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } - pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 7e9cae9d349b5535f973c7825cf356fe48f42d9c..40e9ddd096cdf7294ad633d8bb021b9a5d0c1a1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -67,7 +67,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) * for LDS/Scratch and GPUVM. */ - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || top_address_nybble == 0); return top_address_nybble << 12 | @@ -110,8 +110,6 @@ static int register_process_vi(struct device_queue_manager *dqm, struct kfd_process_device *pdd; unsigned int temp; - BUG_ON(!dqm || !qpd); - pdd = qpd_to_pdd(qpd); /* check if sh_mem_config register already configured */ @@ -137,9 +135,11 @@ static int register_process_vi(struct device_queue_manager *dqm, qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; + qpd->sh_mem_config |= 1 << + SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; } - pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 453c5d66e5c34abf0e6e3f24bbebcacdf2fcca49..acf4d2a977adf7acce0e00df05bbb211d94eda9c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -59,7 +59,7 @@ static inline size_t doorbell_process_allocation(void) } /* Doorbell calculations for device init. */ -void kfd_doorbell_init(struct kfd_dev *kfd) +int kfd_doorbell_init(struct kfd_dev *kfd) { size_t doorbell_start_offset; size_t doorbell_aperture_size; @@ -95,26 +95,35 @@ void kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, doorbell_process_allocation()); - BUG_ON(!kfd->doorbell_kernel_ptr); + if (!kfd->doorbell_kernel_ptr) + return -ENOMEM; - pr_debug("kfd: doorbell initialization:\n"); - pr_debug("kfd: doorbell base == 0x%08lX\n", + pr_debug("Doorbell initialization:\n"); + pr_debug("doorbell base == 0x%08lX\n", (uintptr_t)kfd->doorbell_base); - pr_debug("kfd: doorbell_id_offset == 0x%08lX\n", + pr_debug("doorbell_id_offset == 0x%08lX\n", kfd->doorbell_id_offset); - pr_debug("kfd: doorbell_process_limit == 0x%08lX\n", + pr_debug("doorbell_process_limit == 0x%08lX\n", doorbell_process_limit); - pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n", + pr_debug("doorbell_kernel_offset == 0x%08lX\n", (uintptr_t)kfd->doorbell_base); - pr_debug("kfd: doorbell aperture size == 0x%08lX\n", + pr_debug("doorbell aperture size == 0x%08lX\n", kfd->shared_resources.doorbell_aperture_size); - pr_debug("kfd: doorbell kernel address == 0x%08lX\n", + pr_debug("doorbell kernel address == 0x%08lX\n", (uintptr_t)kfd->doorbell_kernel_ptr); + + return 0; +} + +void kfd_doorbell_fini(struct kfd_dev *kfd) +{ + if (kfd->doorbell_kernel_ptr) + iounmap(kfd->doorbell_kernel_ptr); } int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) @@ -131,7 +140,7 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) /* Find kfd device according to gpu id */ dev = kfd_device_by_id(vma->vm_pgoff); - if (dev == NULL) + if (!dev) return -EINVAL; /* Calculate physical address of doorbell */ @@ -142,12 +151,11 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pr_debug("kfd: mapping doorbell page in %s\n" + pr_debug("Mapping doorbell page\n" " target user address == 0x%08llX\n" " physical address == 0x%08llX\n" " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", - __func__, (unsigned long long) vma->vm_start, address, vma->vm_flags, doorbell_process_allocation()); @@ -166,8 +174,6 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, { u32 inx; - BUG_ON(!kfd || !doorbell_off); - mutex_lock(&kfd->doorbell_mutex); inx = find_first_zero_bit(kfd->doorbell_available_index, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); @@ -185,7 +191,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() / sizeof(u32)) + inx; - pr_debug("kfd: get kernel queue doorbell\n" + pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" " kernel address == 0x%08lX\n", *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); @@ -197,8 +203,6 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) { unsigned int inx; - BUG_ON(!kfd || !db_addr); - inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); mutex_lock(&kfd->doorbell_mutex); @@ -210,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) { if (db) { writel(value, db); - pr_debug("writing %d to doorbell address 0x%p\n", value, db); + pr_debug("Writing %d to doorbell address 0x%p\n", value, db); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index d1ce83d73a877b403b4dc24ecb95ab023d335ecb..5979158c3f7b93e96627af2c03cd814228b0fe01 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -110,7 +110,7 @@ static bool allocate_free_slot(struct kfd_process *process, *out_page = page; *out_slot_index = slot; - pr_debug("allocated event signal slot in page %p, slot %d\n", + pr_debug("Allocated event signal slot in page %p, slot %d\n", page, slot); return true; @@ -155,9 +155,9 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p) struct signal_page, event_pages)->page_index + 1; - pr_debug("allocated new event signal page at %p, for process %p\n", + pr_debug("Allocated new event signal page at %p, for process %p\n", page, p); - pr_debug("page index is %d\n", page->page_index); + pr_debug("Page index is %d\n", page->page_index); list_add(&page->event_pages, &p->signal_event_pages); @@ -194,7 +194,8 @@ static void release_event_notification_slot(struct signal_page *page, page->free_slots++; /* We don't free signal pages, they are retained by the process - * and reused until it exits. */ + * and reused until it exits. + */ } static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p, @@ -246,7 +247,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) for (id = p->next_nonsignal_event_id; id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id) != NULL; + lookup_event_by_id(p, id); id++) ; @@ -265,7 +266,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p) for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; id < KFD_LAST_NONSIGNAL_EVENT_ID && - lookup_event_by_id(p, id) != NULL; + lookup_event_by_id(p, id); id++) ; @@ -291,13 +292,13 @@ static int create_signal_event(struct file *devkfd, struct kfd_event *ev) { if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { - pr_warn("amdkfd: Signal event wasn't created because limit was reached\n"); + pr_warn("Signal event wasn't created because limit was reached\n"); return -ENOMEM; } if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page, &ev->signal_slot_index)) { - pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n"); + pr_warn("Signal event wasn't created because out of kernel memory\n"); return -ENOMEM; } @@ -309,11 +310,7 @@ static int create_signal_event(struct file *devkfd, ev->event_id = make_signal_event_id(ev->signal_page, ev->signal_slot_index); - pr_debug("signal event number %zu created with id %d, address %p\n", - p->signal_event_count, ev->event_id, - ev->user_signal_address); - - pr_debug("signal event number %zu created with id %d, address %p\n", + pr_debug("Signal event number %zu created with id %d, address %p\n", p->signal_event_count, ev->event_id, ev->user_signal_address); @@ -345,7 +342,7 @@ void kfd_event_init_process(struct kfd_process *p) static void destroy_event(struct kfd_process *p, struct kfd_event *ev) { - if (ev->signal_page != NULL) { + if (ev->signal_page) { release_event_notification_slot(ev->signal_page, ev->signal_slot_index); p->signal_event_count--; @@ -584,7 +581,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, * search faster. */ struct signal_page *page; - unsigned i; + unsigned int i; list_for_each_entry(page, &p->signal_event_pages, event_pages) for (i = 0; i < SLOTS_PER_PAGE; i++) @@ -816,7 +813,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) /* check required size is logical */ if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != get_order(vma->vm_end - vma->vm_start)) { - pr_err("amdkfd: event page mmap requested illegal size\n"); + pr_err("Event page mmap requested illegal size\n"); return -EINVAL; } @@ -825,7 +822,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) page = lookup_signal_page_by_index(p, page_index); if (!page) { /* Probably KFD bug, but mmap is user-accessible. */ - pr_debug("signal page could not be found for page_index %u\n", + pr_debug("Signal page could not be found for page_index %u\n", page_index); return -EINVAL; } @@ -836,7 +833,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; - pr_debug("mapping signal page\n"); + pr_debug("Mapping signal page\n"); pr_debug(" start user address == 0x%08lx\n", vma->vm_start); pr_debug(" end user address == 0x%08lx\n", vma->vm_end); pr_debug(" pfn == 0x%016lX\n", pfn); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 2b655103ba79655b9600c5b3b8020b0b1c952f5f..c59384bbbc5fc6befa302f99d10b6fdc0b30af94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -304,7 +304,7 @@ int kfd_init_apertures(struct kfd_process *process) id < NUM_OF_SUPPORTED_GPUS) { pdd = kfd_create_process_device_data(dev, process); - if (pdd == NULL) { + if (!pdd) { pr_err("Failed to create process device data\n"); return -1; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 7f134aa9bfd326626d601e3dc5f4fa516d4a9670..70b3a99cffc22541e73761a1f7b475aefabdf71e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -179,7 +179,7 @@ static void interrupt_wq(struct work_struct *work) bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { /* integer and bitwise OR so there is no boolean short-circuiting */ - unsigned wanted = 0; + unsigned int wanted = 0; wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, ih_ring_entry); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index d135cd002a951edc873cfaab44be77fa4db69aec..681b639f51330b78af84854880cae988fd90b252 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -41,11 +41,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, int retval; union PM4_MES_TYPE_3_HEADER nop; - BUG_ON(!kq || !dev); - BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ); + if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ)) + return false; - pr_debug("amdkfd: In func %s initializing queue type %d size %d\n", - __func__, KFD_QUEUE_TYPE_HIQ, queue_size); + pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ, + queue_size); memset(&prop, 0, sizeof(prop)); memset(&nop, 0, sizeof(nop)); @@ -63,23 +63,23 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, KFD_MQD_TYPE_HIQ); break; default: - BUG(); - break; + pr_err("Invalid queue type %d\n", type); + return false; } - if (kq->mqd == NULL) + if (!kq->mqd) return false; prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); - if (prop.doorbell_ptr == NULL) { - pr_err("amdkfd: error init doorbell"); + if (!prop.doorbell_ptr) { + pr_err("Failed to initialize doorbell"); goto err_get_kernel_doorbell; } retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); if (retval != 0) { - pr_err("amdkfd: error init pq queues size (%d)\n", queue_size); + pr_err("Failed to init pq queues size %d\n", queue_size); goto err_pq_allocate_vidmem; } @@ -87,7 +87,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_gpu_addr = kq->pq->gpu_addr; retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); - if (retval == false) + if (!retval) goto err_eop_allocate_vidmem; retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), @@ -139,11 +139,12 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, /* assign HIQ to HQD */ if (type == KFD_QUEUE_TYPE_HIQ) { - pr_debug("assigning hiq to hqd\n"); + pr_debug("Assigning hiq to hqd\n"); kq->queue->pipe = KFD_CIK_HIQ_PIPE; kq->queue->queue = KFD_CIK_HIQ_QUEUE; kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, - kq->queue->queue, NULL); + kq->queue->queue, &kq->queue->properties, + NULL); } else { /* allocate fence for DIQ */ @@ -180,8 +181,6 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, static void uninitialize(struct kernel_queue *kq) { - BUG_ON(!kq); - if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) kq->mqd->destroy_mqd(kq->mqd, NULL, @@ -211,8 +210,6 @@ static int acquire_packet_buffer(struct kernel_queue *kq, uint32_t wptr, rptr; unsigned int *queue_address; - BUG_ON(!kq || !buffer_ptr); - rptr = *kq->rptr_kernel; wptr = *kq->wptr_kernel; queue_address = (unsigned int *)kq->pq_kernel_addr; @@ -252,11 +249,7 @@ static void submit_packet(struct kernel_queue *kq) { #ifdef DEBUG int i; -#endif - - BUG_ON(!kq); -#ifdef DEBUG for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) { pr_debug("0x%2X ", kq->pq_kernel_addr[i]); if (i % 15 == 0) @@ -272,7 +265,6 @@ static void submit_packet(struct kernel_queue *kq) static void rollback_packet(struct kernel_queue *kq) { - BUG_ON(!kq); kq->pending_wptr = *kq->queue->properties.write_ptr; } @@ -281,9 +273,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, { struct kernel_queue *kq; - BUG_ON(!dev); - - kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL); + kq = kzalloc(sizeof(*kq), GFP_KERNEL); if (!kq) return NULL; @@ -304,7 +294,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, } if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) { - pr_err("amdkfd: failed to init kernel queue\n"); + pr_err("Failed to init kernel queue\n"); kfree(kq); return NULL; } @@ -313,32 +303,37 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, void kernel_queue_uninit(struct kernel_queue *kq) { - BUG_ON(!kq); - kq->ops.uninitialize(kq); kfree(kq); } +/* FIXME: Can this test be removed? */ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) { struct kernel_queue *kq; uint32_t *buffer, i; int retval; - BUG_ON(!dev); - - pr_err("amdkfd: starting kernel queue test\n"); + pr_err("Starting kernel queue test\n"); kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); - BUG_ON(!kq); + if (unlikely(!kq)) { + pr_err(" Failed to initialize HIQ\n"); + pr_err("Kernel queue test failed\n"); + return; + } retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer); - BUG_ON(retval != 0); + if (unlikely(retval != 0)) { + pr_err(" Failed to acquire packet buffer\n"); + pr_err("Kernel queue test failed\n"); + return; + } for (i = 0; i < 5; i++) buffer[i] = kq->nop_packet; kq->ops.submit_packet(kq); - pr_err("amdkfd: ending kernel queue test\n"); + pr_err("Ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 850a5623661f83be1dcb2c2108fdf824a2002f94..0d73bea22c450dc63455a86ef708113c411335d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -61,7 +61,8 @@ MODULE_PARM_DESC(send_sigterm, static int amdkfd_init_completed; -int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f) +int kgd2kfd_init(unsigned int interface_version, + const struct kgd2kfd_calls **g2f) { if (!amdkfd_init_completed) return -EPROBE_DEFER; @@ -90,7 +91,7 @@ static int __init kfd_module_init(void) /* Verify module parameters */ if ((sched_policy < KFD_SCHED_POLICY_HWS) || (sched_policy > KFD_SCHED_POLICY_NO_HWS)) { - pr_err("kfd: sched_policy has invalid value\n"); + pr_err("sched_policy has invalid value\n"); return -1; } @@ -98,13 +99,13 @@ static int __init kfd_module_init(void) if ((max_num_of_queues_per_device < 1) || (max_num_of_queues_per_device > KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) { - pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n"); + pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n"); return -1; } err = kfd_pasid_init(); if (err < 0) - goto err_pasid; + return err; err = kfd_chardev_init(); if (err < 0) @@ -126,7 +127,6 @@ static int __init kfd_module_init(void) kfd_chardev_exit(); err_ioctl: kfd_pasid_exit(); -err_pasid: return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 213a71e0b6c784d2694d57c978e3bfb92cde8b04..1f3a6ba7eed21e624cf80ccfc068dc7f6ef73929 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -67,7 +67,8 @@ struct mqd_manager { int (*load_mqd)(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t __user *wptr); + struct queue_properties *p, + struct mm_struct *mms); int (*update_mqd)(struct mqd_manager *mm, void *mqd, struct queue_properties *q); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 6acc4313363e1fc2485883144c27ff534565edfe..44ffd23348fc4481932a851ea6e1d6a38a9b2989 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -44,10 +44,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, struct cik_mqd *m; int retval; - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), mqd_mem_obj); @@ -101,7 +97,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_iq_rptr = AQL_ENABLE; *mqd = m; - if (gart_addr != NULL) + if (gart_addr) *gart_addr = addr; retval = mm->update_mqd(mm, m, q); @@ -115,8 +111,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, int retval; struct cik_sdma_rlc_registers *m; - BUG_ON(!mm || !mqd || !mqd_mem_obj); - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_sdma_rlc_registers), mqd_mem_obj); @@ -129,7 +123,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, memset(m, 0, sizeof(struct cik_sdma_rlc_registers)); *mqd = m; - if (gart_addr != NULL) + if (gart_addr) *gart_addr = (*mqd_mem_obj)->gpu_addr; retval = mm->update_mqd(mm, m, q); @@ -140,27 +134,31 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { - BUG_ON(!mm || !mqd); kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { - BUG_ON(!mm || !mqd); kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr) + uint32_t queue_id, struct queue_properties *p, + struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_load - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, wptr_mask, mms); } static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t __user *wptr) + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) { return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd); } @@ -170,10 +168,6 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, { struct cik_mqd *m; - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; @@ -188,21 +182,17 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); - m->cp_hqd_pq_doorbell_control = DOORBELL_EN | - DOORBELL_OFFSET(q->doorbell_off); + m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off); m->cp_hqd_vmid = q->vmid; - if (q->format == KFD_QUEUE_FORMAT_AQL) { + if (q->format == KFD_QUEUE_FORMAT_AQL) m->cp_hqd_pq_control |= NO_UPDATE_RPTR; - } - m->cp_hqd_active = 0; q->is_active = false; if (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0) { - m->cp_hqd_active = 1; q->is_active = true; } @@ -214,8 +204,6 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, { struct cik_sdma_rlc_registers *m; - BUG_ON(!mm || !mqd || !q); - m = get_sdma_mqd(mqd); m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | @@ -254,7 +242,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) { - return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, + return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, pipe_id, queue_id); } @@ -301,10 +289,6 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct cik_mqd *m; int retval; - BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); - - pr_debug("kfd: In func %s\n", __func__); - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), mqd_mem_obj); @@ -359,10 +343,6 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, { struct cik_mqd *m; - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | @@ -400,8 +380,6 @@ struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) { struct cik_sdma_rlc_registers *m; - BUG_ON(!mqd); - m = (struct cik_sdma_rlc_registers *)mqd; return m; @@ -412,12 +390,10 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, { struct mqd_manager *mqd; - BUG_ON(!dev); - BUG_ON(type >= KFD_MQD_TYPE_MAX); - - pr_debug("kfd: In func %s\n", __func__); + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; - mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index a9b9882a9a7723de538307b6aa5ad4e2b6e16ff1..73cbfe186dd22ec38e9469e804ef69527cf55eab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -85,7 +85,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_iq_rptr = 1; *mqd = m; - if (gart_addr != NULL) + if (gart_addr) *gart_addr = addr; retval = mm->update_mqd(mm, m, q); @@ -94,10 +94,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, - uint32_t __user *wptr) + struct queue_properties *p, struct mm_struct *mms) { - return mm->dev->kfd2kgd->hqd_load - (mm->dev->kgd, mqd, pipe_id, queue_id, wptr); + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, wptr_mask, mms); } static int __update_mqd(struct mqd_manager *mm, void *mqd, @@ -106,10 +111,6 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, { struct vi_mqd *m; - BUG_ON(!mm || !q || !mqd); - - pr_debug("kfd: In func %s\n", __func__); - m = get_mqd(mqd); m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT | @@ -117,7 +118,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); @@ -126,10 +127,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_doorbell_control = - 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT | q->doorbell_off << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; - pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n", + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", m->cp_hqd_pq_doorbell_control); m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT | @@ -139,8 +139,15 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT; - m->cp_hqd_eop_control |= - ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1; + /* + * HW does not clamp this field correctly. Maximum EOP queue size + * is constrained by per-SE EOP done signal count, which is 8-bit. + * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit + * more than (EOP entry count - 1) so a queue size of 0x800 dwords + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control |= min(0xA, + ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1); m->cp_hqd_eop_base_addr_lo = lower_32_bits(q->eop_ring_buffer_address >> 8); m->cp_hqd_eop_base_addr_hi = @@ -156,12 +163,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT; } - m->cp_hqd_active = 0; q->is_active = false; if (q->queue_size > 0 && q->queue_address != 0 && q->queue_percent > 0) { - m->cp_hqd_active = 1; q->is_active = true; } @@ -181,14 +186,13 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { return mm->dev->kfd2kgd->hqd_destroy - (mm->dev->kgd, type, timeout, + (mm->dev->kgd, mqd, type, timeout, pipe_id, queue_id); } static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_mem_obj *mqd_mem_obj) { - BUG_ON(!mm || !mqd); kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } @@ -238,12 +242,10 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, { struct mqd_manager *mqd; - BUG_ON(!dev); - BUG_ON(type >= KFD_MQD_TYPE_MAX); - - pr_debug("kfd: In func %s\n", __func__); + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; - mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 7131998848d76ca9492d0b76925e3f35d0f9f5c1..1d312603de9fbb1624a98fbfac9a3d240490bf4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,7 +26,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" -#include "kfd_pm4_headers.h" #include "kfd_pm4_headers_vi.h" #include "kfd_pm4_opcodes.h" @@ -35,7 +34,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, { unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); - BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); + WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, + "Runlist IB overflow"); *wptr = temp; } @@ -43,12 +43,12 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) { union PM4_MES_TYPE_3_HEADER header; - header.u32all = 0; + header.u32All = 0; header.opcode = opcode; header.count = packet_size/sizeof(uint32_t) - 2; header.type = PM4_TYPE_3; - return header.u32all; + return header.u32All; } static void pm_calc_rlib_size(struct packet_manager *pm, @@ -58,8 +58,6 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int process_count, queue_count; unsigned int map_queue_size; - BUG_ON(!pm || !rlib_size || !over_subscription); - process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; @@ -67,15 +65,12 @@ static void pm_calc_rlib_size(struct packet_manager *pm, *over_subscription = false; if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) { *over_subscription = true; - pr_debug("kfd: over subscribed runlist\n"); + pr_debug("Over subscribed runlist\n"); } - map_queue_size = - (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ? - sizeof(struct pm4_mes_map_queues) : - sizeof(struct pm4_map_queues); + map_queue_size = sizeof(struct pm4_mes_map_queues); /* calculate run list ib allocation size */ - *rlib_size = process_count * sizeof(struct pm4_map_process) + + *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + queue_count * map_queue_size; /* @@ -83,9 +78,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * when over subscription */ if (*over_subscription) - *rlib_size += sizeof(struct pm4_runlist); + *rlib_size += sizeof(struct pm4_mes_runlist); - pr_debug("kfd: runlist ib size %d\n", *rlib_size); + pr_debug("runlist ib size %d\n", *rlib_size); } static int pm_allocate_runlist_ib(struct packet_manager *pm, @@ -96,17 +91,16 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, { int retval; - BUG_ON(!pm); - BUG_ON(pm->allocated); - BUG_ON(is_over_subscription == NULL); + if (WARN_ON(pm->allocated)) + return -EINVAL; pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); - if (retval != 0) { - pr_err("kfd: failed to allocate runlist IB\n"); + if (retval) { + pr_err("Failed to allocate runlist IB\n"); return retval; } @@ -121,15 +115,16 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { - struct pm4_runlist *packet; + struct pm4_mes_runlist *packet; - BUG_ON(!pm || !buffer || !ib); + if (WARN_ON(!ib)) + return -EFAULT; - packet = (struct pm4_runlist *)buffer; + packet = (struct pm4_mes_runlist *)buffer; - memset(buffer, 0, sizeof(struct pm4_runlist)); - packet->header.u32all = build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_runlist)); + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); packet->bitfields4.ib_size = ib_size_in_dwords; packet->bitfields4.chain = chain ? 1 : 0; @@ -144,20 +139,16 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) { - struct pm4_map_process *packet; + struct pm4_mes_map_process *packet; struct queue *cur; uint32_t num_queues; - BUG_ON(!pm || !buffer || !qpd); - - packet = (struct pm4_map_process *)buffer; - - pr_debug("kfd: In func %s\n", __func__); + packet = (struct pm4_mes_map_process *)buffer; - memset(buffer, 0, sizeof(struct pm4_map_process)); + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - packet->header.u32all = build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_map_process)); + packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 1; packet->bitfields2.pasid = qpd->pqm->process->pasid; @@ -175,27 +166,26 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; + /* TODO: scratch support */ + packet->sh_hidden_private_base_vmid = 0; + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); return 0; } -static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, +static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { struct pm4_mes_map_queues *packet; bool use_static = is_static; - BUG_ON(!pm || !buffer || !q); - - pr_debug("kfd: In func %s\n", __func__); - packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_map_queues)); + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); - packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_map_queues)); + packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); packet->bitfields2.alloc_format = alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; @@ -223,10 +213,8 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, use_static = false; /* no static queues under SDMA */ break; default: - pr_err("kfd: in %s queue type %d\n", __func__, - q->properties.type); - BUG(); - break; + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; } packet->bitfields3.doorbell_offset = q->properties.doorbell_off; @@ -246,68 +234,6 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q, bool is_static) -{ - struct pm4_map_queues *packet; - bool use_static = is_static; - - BUG_ON(!pm || !buffer || !q); - - pr_debug("kfd: In func %s\n", __func__); - - packet = (struct pm4_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_map_queues)); - - packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe; - packet->bitfields2.num_queues = 1; - packet->bitfields2.queue_sel = - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots; - - packet->bitfields2.vidmem = (q->properties.is_interop) ? - vidmem__mes_map_queues__uses_video_memory : - vidmem__mes_map_queues__uses_no_video_memory; - - switch (q->properties.type) { - case KFD_QUEUE_TYPE_COMPUTE: - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__compute; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__sdma0; - use_static = false; /* no static queues under SDMA */ - break; - default: - BUG(); - break; - } - - packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = - q->properties.doorbell_off; - - packet->mes_map_queues_ordinals[0].bitfields3.is_static = - (use_static) ? 1 : 0; - - packet->mes_map_queues_ordinals[0].mqd_addr_lo = - lower_32_bits(q->gart_mqd_addr); - - packet->mes_map_queues_ordinals[0].mqd_addr_hi = - upper_32_bits(q->gart_mqd_addr); - - packet->mes_map_queues_ordinals[0].wptr_addr_lo = - lower_32_bits((uint64_t)q->properties.write_ptr); - - packet->mes_map_queues_ordinals[0].wptr_addr_hi = - upper_32_bits((uint64_t)q->properties.write_ptr); - - return 0; -} - static int pm_create_runlist_ib(struct packet_manager *pm, struct list_head *queues, uint64_t *rl_gpu_addr, @@ -322,19 +248,16 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct kernel_queue *kq; bool is_over_subscription; - BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr); - rl_wptr = retval = proccesses_mapped = 0; retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, &alloc_size_bytes, &is_over_subscription); - if (retval != 0) + if (retval) return retval; *rl_size_bytes = alloc_size_bytes; - pr_debug("kfd: In func %s\n", __func__); - pr_debug("kfd: building runlist ib process count: %d queues count %d\n", + pr_debug("Building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->queue_count); /* build the run list ib packet */ @@ -342,42 +265,35 @@ static int pm_create_runlist_ib(struct packet_manager *pm, qpd = cur->qpd; /* build map process packet */ if (proccesses_mapped >= pm->dqm->processes_count) { - pr_debug("kfd: not enough space left in runlist IB\n"); + pr_debug("Not enough space left in runlist IB\n"); pm_release_ib(pm); return -ENOMEM; } retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); - if (retval != 0) + if (retval) return retval; proccesses_mapped++; - inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), + inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), alloc_size_bytes); list_for_each_entry(kq, &qpd->priv_queue_list, list) { if (!kq->queue->properties.is_active) continue; - pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", + pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - if (pm->dqm->dev->device_info->asic_family == - CHIP_CARRIZO) - retval = pm_create_map_queue_vi(pm, - &rl_buffer[rl_wptr], - kq->queue, - qpd->is_debug); - else - retval = pm_create_map_queue(pm, + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); - if (retval != 0) + if (retval) return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_map_queues), + sizeof(struct pm4_mes_map_queues), alloc_size_bytes); } @@ -385,51 +301,44 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (!q->properties.is_active) continue; - pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", + pr_debug("static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - if (pm->dqm->dev->device_info->asic_family == - CHIP_CARRIZO) - retval = pm_create_map_queue_vi(pm, - &rl_buffer[rl_wptr], - q, - qpd->is_debug); - else - retval = pm_create_map_queue(pm, + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], q, qpd->is_debug); - if (retval != 0) + if (retval) return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_map_queues), + sizeof(struct pm4_mes_map_queues), alloc_size_bytes); } } - pr_debug("kfd: finished map process and queues to runlist\n"); + pr_debug("Finished map process and queues to runlist\n"); if (is_over_subscription) - pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, - alloc_size_bytes / sizeof(uint32_t), true); + retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], + *rl_gpu_addr, + alloc_size_bytes / sizeof(uint32_t), + true); for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); pr_debug("\n"); - return 0; + return retval; } int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { - BUG_ON(!dqm); - pm->dqm = dqm; mutex_init(&pm->lock); pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); - if (pm->priv_queue == NULL) { + if (!pm->priv_queue) { mutex_destroy(&pm->lock); return -ENOMEM; } @@ -440,8 +349,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) void pm_uninit(struct packet_manager *pm) { - BUG_ON(!pm); - mutex_destroy(&pm->lock); kernel_queue_uninit(pm->priv_queue); } @@ -449,25 +356,22 @@ void pm_uninit(struct packet_manager *pm) int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { - struct pm4_set_resources *packet; - - BUG_ON(!pm || !res); - - pr_debug("kfd: In func %s\n", __func__); + struct pm4_mes_set_resources *packet; + int retval = 0; mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, sizeof(*packet) / sizeof(uint32_t), - (unsigned int **)&packet); - if (packet == NULL) { - mutex_unlock(&pm->lock); - pr_err("kfd: failed to allocate buffer on kernel queue\n"); - return -ENOMEM; + (unsigned int **)&packet); + if (!packet) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; } - memset(packet, 0, sizeof(struct pm4_set_resources)); - packet->header.u32all = build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_set_resources)); + memset(packet, 0, sizeof(struct pm4_mes_set_resources)); + packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); packet->bitfields2.queue_type = queue_type__mes_set_resources__hsa_interface_queue_hiq; @@ -485,9 +389,10 @@ int pm_send_set_resources(struct packet_manager *pm, pm->priv_queue->ops.submit_packet(pm->priv_queue); +out: mutex_unlock(&pm->lock); - return 0; + return retval; } int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) @@ -497,26 +402,24 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) size_t rl_ib_size, packet_size_dwords; int retval; - BUG_ON(!pm || !dqm_queues); - retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, &rl_ib_size); - if (retval != 0) + if (retval) goto fail_create_runlist_ib; - pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr); + pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); + packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, packet_size_dwords, &rl_buffer); - if (retval != 0) + if (retval) goto fail_acquire_packet_buffer; retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); - if (retval != 0) + if (retval) goto fail_create_runlist; pm->priv_queue->ops.submit_packet(pm->priv_queue); @@ -530,8 +433,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) fail_acquire_packet_buffer: mutex_unlock(&pm->lock); fail_create_runlist_ib: - if (pm->allocated) - pm_release_ib(pm); + pm_release_ib(pm); return retval; } @@ -539,20 +441,21 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value) { int retval; - struct pm4_query_status *packet; + struct pm4_mes_query_status *packet; - BUG_ON(!pm || !fence_address); + if (WARN_ON(!fence_address)) + return -EFAULT; mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, - sizeof(struct pm4_query_status) / sizeof(uint32_t), + sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), (unsigned int **)&packet); - if (retval != 0) + if (retval) goto fail_acquire_packet_buffer; - packet->header.u32all = build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_query_status)); + packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); packet->bitfields2.context_id = 0; packet->bitfields2.interrupt_sel = @@ -566,9 +469,6 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, packet->data_lo = lower_32_bits((uint64_t)fence_value); pm->priv_queue->ops.submit_packet(pm->priv_queue); - mutex_unlock(&pm->lock); - - return 0; fail_acquire_packet_buffer: mutex_unlock(&pm->lock); @@ -582,24 +482,22 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, { int retval; uint32_t *buffer; - struct pm4_unmap_queues *packet; - - BUG_ON(!pm); + struct pm4_mes_unmap_queues *packet; mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer( pm->priv_queue, - sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), + sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), &buffer); - if (retval != 0) + if (retval) goto err_acquire_packet_buffer; - packet = (struct pm4_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_unmap_queues)); - pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", mode, reset, type); - packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_unmap_queues)); + packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); switch (type) { case KFD_QUEUE_TYPE_COMPUTE: case KFD_QUEUE_TYPE_DIQ: @@ -611,8 +509,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; default: - BUG(); - break; + WARN(1, "queue type %d", type); + retval = -EINVAL; + goto err_invalid; } if (reset) @@ -636,16 +535,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, break; case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; + queue_sel__mes_unmap_queues__unmap_all_queues; break; case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: /* in this case, we do not preempt static queues */ packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; break; default: - BUG(); - break; + WARN(1, "filter %d", mode); + retval = -EINVAL; + goto err_invalid; } pm->priv_queue->ops.submit_packet(pm->priv_queue); @@ -653,6 +553,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, mutex_unlock(&pm->lock); return 0; +err_invalid: + pm->priv_queue->ops.rollback_packet(pm->priv_queue); err_acquire_packet_buffer: mutex_unlock(&pm->lock); return retval; @@ -660,8 +562,6 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm) { - BUG_ON(!pm); - mutex_lock(&pm->lock); if (pm->allocated) { kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 6cfe7f1f18cff0d805a75097a5a9f86e2a4fcdfd..1e06de0bc6739f27e129b86e4b35f4d6d8cd8bd6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,7 +32,8 @@ int kfd_pasid_init(void) { pasid_limit = KFD_MAX_NUM_OF_PROCESSES; - pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), + GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; @@ -91,6 +92,6 @@ unsigned int kfd_pasid_alloc(void) void kfd_pasid_free(unsigned int pasid) { - BUG_ON(pasid == 0 || pasid >= pasid_limit); - clear_bit(pasid, pasid_bitmap); + if (!WARN_ON(pasid == 0 || pasid >= pasid_limit)) + clear_bit(pasid, pasid_bitmap); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h index 5b393f3e34a9f0a0082c1eb6edd8370cb638e223..e50f73d25de69d44c670e9e66d6f3253f8588688 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -28,112 +28,19 @@ #define PM4_MES_HEADER_DEFINED union PM4_MES_TYPE_3_HEADER { struct { - uint32_t reserved1:8; /* < reserved */ - uint32_t opcode:8; /* < IT opcode */ - uint32_t count:14; /* < number of DWORDs - 1 - * in the information body. - */ - uint32_t type:2; /* < packet identifier. - * It should be 3 for type 3 packets - */ + /* reserved */ + uint32_t reserved1:8; + /* IT opcode */ + uint32_t opcode:8; + /* number of DWORDs - 1 in the information body */ + uint32_t count:14; + /* packet identifier. It should be 3 for type 3 packets */ + uint32_t type:2; }; uint32_t u32all; }; #endif /* PM4_MES_HEADER_DEFINED */ -/* --------------------MES_SET_RESOURCES-------------------- */ - -#ifndef PM4_MES_SET_RESOURCES_DEFINED -#define PM4_MES_SET_RESOURCES_DEFINED -enum set_resources_queue_type_enum { - queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, - queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, - queue_type__mes_set_resources__hsa_debug_interface_queue = 4 -}; - -struct pm4_set_resources { - union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; - }; - - union { - struct { - uint32_t vmid_mask:16; - uint32_t unmap_latency:8; - uint32_t reserved1:5; - enum set_resources_queue_type_enum queue_type:3; - } bitfields2; - uint32_t ordinal2; - }; - - uint32_t queue_mask_lo; - uint32_t queue_mask_hi; - uint32_t gws_mask_lo; - uint32_t gws_mask_hi; - - union { - struct { - uint32_t oac_mask:16; - uint32_t reserved2:16; - } bitfields7; - uint32_t ordinal7; - }; - - union { - struct { - uint32_t gds_heap_base:6; - uint32_t reserved3:5; - uint32_t gds_heap_size:6; - uint32_t reserved4:15; - } bitfields8; - uint32_t ordinal8; - }; - -}; -#endif - -/*--------------------MES_RUN_LIST-------------------- */ - -#ifndef PM4_MES_RUN_LIST_DEFINED -#define PM4_MES_RUN_LIST_DEFINED - -struct pm4_runlist { - union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; - }; - - union { - struct { - uint32_t reserved1:2; - uint32_t ib_base_lo:30; - } bitfields2; - uint32_t ordinal2; - }; - - union { - struct { - uint32_t ib_base_hi:16; - uint32_t reserved2:16; - } bitfields3; - uint32_t ordinal3; - }; - - union { - struct { - uint32_t ib_size:20; - uint32_t chain:1; - uint32_t offload_polling:1; - uint32_t reserved3:1; - uint32_t valid:1; - uint32_t reserved4:8; - } bitfields4; - uint32_t ordinal4; - }; - -}; -#endif /*--------------------MES_MAP_PROCESS-------------------- */ @@ -186,217 +93,58 @@ struct pm4_map_process { }; #endif -/*--------------------MES_MAP_QUEUES--------------------*/ - -#ifndef PM4_MES_MAP_QUEUES_DEFINED -#define PM4_MES_MAP_QUEUES_DEFINED -enum map_queues_queue_sel_enum { - queue_sel__mes_map_queues__map_to_specified_queue_slots = 0, - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1, - queue_sel__mes_map_queues__enable_process_queues = 2 -}; +#ifndef PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH +#define PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH -enum map_queues_vidmem_enum { - vidmem__mes_map_queues__uses_no_video_memory = 0, - vidmem__mes_map_queues__uses_video_memory = 1 -}; - -enum map_queues_alloc_format_enum { - alloc_format__mes_map_queues__one_per_pipe = 0, - alloc_format__mes_map_queues__all_on_one_pipe = 1 -}; - -enum map_queues_engine_sel_enum { - engine_sel__mes_map_queues__compute = 0, - engine_sel__mes_map_queues__sdma0 = 2, - engine_sel__mes_map_queues__sdma1 = 3 -}; - -struct pm4_map_queues { +struct pm4_map_process_scratch_kv { union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; - }; - - union { - struct { - uint32_t reserved1:4; - enum map_queues_queue_sel_enum queue_sel:2; - uint32_t reserved2:2; - uint32_t vmid:4; - uint32_t reserved3:4; - enum map_queues_vidmem_enum vidmem:2; - uint32_t reserved4:6; - enum map_queues_alloc_format_enum alloc_format:2; - enum map_queues_engine_sel_enum engine_sel:3; - uint32_t num_queues:3; - } bitfields2; - uint32_t ordinal2; - }; - - struct { - union { - struct { - uint32_t is_static:1; - uint32_t reserved5:1; - uint32_t doorbell_offset:21; - uint32_t reserved6:3; - uint32_t queue:6; - } bitfields3; - uint32_t ordinal3; - }; - - uint32_t mqd_addr_lo; - uint32_t mqd_addr_hi; - uint32_t wptr_addr_lo; - uint32_t wptr_addr_hi; - - } mes_map_queues_ordinals[1]; /* 1..N of these ordinal groups */ - -}; -#endif - -/*--------------------MES_QUERY_STATUS--------------------*/ - -#ifndef PM4_MES_QUERY_STATUS_DEFINED -#define PM4_MES_QUERY_STATUS_DEFINED -enum query_status_interrupt_sel_enum { - interrupt_sel__mes_query_status__completion_status = 0, - interrupt_sel__mes_query_status__process_status = 1, - interrupt_sel__mes_query_status__queue_status = 2 -}; - -enum query_status_command_enum { - command__mes_query_status__interrupt_only = 0, - command__mes_query_status__fence_only_immediate = 1, - command__mes_query_status__fence_only_after_write_ack = 2, - command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 -}; - -enum query_status_engine_sel_enum { - engine_sel__mes_query_status__compute = 0, - engine_sel__mes_query_status__sdma0_queue = 2, - engine_sel__mes_query_status__sdma1_queue = 3 -}; - -struct pm4_query_status { - union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; - }; - - union { - struct { - uint32_t context_id:28; - enum query_status_interrupt_sel_enum interrupt_sel:2; - enum query_status_command_enum command:2; - } bitfields2; - uint32_t ordinal2; + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; }; union { struct { uint32_t pasid:16; - uint32_t reserved1:16; - } bitfields3a; - struct { - uint32_t reserved2:2; - uint32_t doorbell_offset:21; - uint32_t reserved3:3; - enum query_status_engine_sel_enum engine_sel:3; - uint32_t reserved4:3; - } bitfields3b; - uint32_t ordinal3; - }; - - uint32_t addr_lo; - uint32_t addr_hi; - uint32_t data_lo; - uint32_t data_hi; -}; -#endif - -/*--------------------MES_UNMAP_QUEUES--------------------*/ - -#ifndef PM4_MES_UNMAP_QUEUES_DEFINED -#define PM4_MES_UNMAP_QUEUES_DEFINED -enum unmap_queues_action_enum { - action__mes_unmap_queues__preempt_queues = 0, - action__mes_unmap_queues__reset_queues = 1, - action__mes_unmap_queues__disable_process_queues = 2 -}; - -enum unmap_queues_queue_sel_enum { - queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, - queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, - queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, - queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 -}; - -enum unmap_queues_engine_sel_enum { - engine_sel__mes_unmap_queues__compute = 0, - engine_sel__mes_unmap_queues__sdma0 = 2, - engine_sel__mes_unmap_queues__sdma1 = 3 -}; - -struct pm4_unmap_queues { - union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; - }; - - union { - struct { - enum unmap_queues_action_enum action:2; - uint32_t reserved1:2; - enum unmap_queues_queue_sel_enum queue_sel:2; - uint32_t reserved2:20; - enum unmap_queues_engine_sel_enum engine_sel:3; - uint32_t num_queues:3; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; } bitfields2; uint32_t ordinal2; }; union { struct { - uint32_t pasid:16; - uint32_t reserved3:16; - } bitfields3a; - struct { - uint32_t reserved4:2; - uint32_t doorbell_offset0:21; - uint32_t reserved5:9; - } bitfields3b; + uint32_t page_table_base:28; + uint32_t reserved2:4; + } bitfields3; uint32_t ordinal3; }; - union { - struct { - uint32_t reserved6:2; - uint32_t doorbell_offset1:21; - uint32_t reserved7:9; - } bitfields4; - uint32_t ordinal4; - }; - - union { - struct { - uint32_t reserved8:2; - uint32_t doorbell_offset2:21; - uint32_t reserved9:9; - } bitfields5; - uint32_t ordinal5; - }; + uint32_t reserved3; + uint32_t sh_mem_bases; + uint32_t sh_mem_config; + uint32_t sh_mem_ape1_base; + uint32_t sh_mem_ape1_limit; + uint32_t sh_hidden_private_base_vmid; + uint32_t reserved4; + uint32_t reserved5; + uint32_t gds_addr_lo; + uint32_t gds_addr_hi; union { struct { - uint32_t reserved10:2; - uint32_t doorbell_offset3:21; - uint32_t reserved11:9; - } bitfields6; - uint32_t ordinal6; + uint32_t num_gws:6; + uint32_t reserved6:2; + uint32_t num_oac:4; + uint32_t reserved7:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; }; + uint32_t completion_signal_lo32; +uint32_t completion_signal_hi32; }; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 08c721922812bee0b6c7350d3719046fd6691ac4..7c8d9b3577490c909a9361be84cb8959494e8c0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -30,10 +30,12 @@ union PM4_MES_TYPE_3_HEADER { struct { uint32_t reserved1 : 8; /* < reserved */ uint32_t opcode : 8; /* < IT opcode */ - uint32_t count : 14;/* < number of DWORDs - 1 in the - information body. */ - uint32_t type : 2; /* < packet identifier. - It should be 3 for type 3 packets */ + uint32_t count : 14;/* < Number of DWORDS - 1 in the + * information body + */ + uint32_t type : 2; /* < packet identifier + * It should be 3 for type 3 packets + */ }; uint32_t u32All; }; @@ -124,9 +126,10 @@ struct pm4_mes_runlist { uint32_t ib_size:20; uint32_t chain:1; uint32_t offload_polling:1; - uint32_t reserved3:1; + uint32_t reserved2:1; uint32_t valid:1; - uint32_t reserved4:8; + uint32_t process_cnt:4; + uint32_t reserved3:4; } bitfields4; uint32_t ordinal4; }; @@ -141,8 +144,8 @@ struct pm4_mes_runlist { struct pm4_mes_map_process { union { - union PM4_MES_TYPE_3_HEADER header; /* header */ - uint32_t ordinal1; + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; }; union { @@ -153,36 +156,48 @@ struct pm4_mes_map_process { uint32_t process_quantum:7; } bitfields2; uint32_t ordinal2; -}; + }; union { struct { uint32_t page_table_base:28; - uint32_t reserved2:4; + uint32_t reserved3:4; } bitfields3; uint32_t ordinal3; }; + uint32_t reserved; + uint32_t sh_mem_bases; + uint32_t sh_mem_config; uint32_t sh_mem_ape1_base; uint32_t sh_mem_ape1_limit; - uint32_t sh_mem_config; + + uint32_t sh_hidden_private_base_vmid; + + uint32_t reserved2; + uint32_t reserved3; + uint32_t gds_addr_lo; uint32_t gds_addr_hi; union { struct { uint32_t num_gws:6; - uint32_t reserved3:2; + uint32_t reserved4:2; uint32_t num_oac:4; - uint32_t reserved4:4; + uint32_t reserved5:4; uint32_t gds_size:6; uint32_t num_queues:10; } bitfields10; uint32_t ordinal10; }; + uint32_t completion_signal_lo; + uint32_t completion_signal_hi; + }; + #endif /*--------------------MES_MAP_QUEUES--------------------*/ @@ -335,7 +350,7 @@ enum mes_unmap_queues_engine_sel_enum { engine_sel__mes_unmap_queues__sdmal = 3 }; -struct PM4_MES_UNMAP_QUEUES { +struct pm4_mes_unmap_queues { union { union PM4_MES_TYPE_3_HEADER header; /* header */ uint32_t ordinal1; @@ -395,4 +410,101 @@ struct PM4_MES_UNMAP_QUEUES { }; #endif +#ifndef PM4_MEC_RELEASE_MEM_DEFINED +#define PM4_MEC_RELEASE_MEM_DEFINED +enum RELEASE_MEM_event_index_enum { + event_index___release_mem__end_of_pipe = 5, + event_index___release_mem__shader_done = 6 +}; + +enum RELEASE_MEM_cache_policy_enum { + cache_policy___release_mem__lru = 0, + cache_policy___release_mem__stream = 1, + cache_policy___release_mem__bypass = 2 +}; + +enum RELEASE_MEM_dst_sel_enum { + dst_sel___release_mem__memory_controller = 0, + dst_sel___release_mem__tc_l2 = 1, + dst_sel___release_mem__queue_write_pointer_register = 2, + dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum RELEASE_MEM_int_sel_enum { + int_sel___release_mem__none = 0, + int_sel___release_mem__send_interrupt_only = 1, + int_sel___release_mem__send_interrupt_after_write_confirm = 2, + int_sel___release_mem__send_data_after_write_confirm = 3 +}; + +enum RELEASE_MEM_data_sel_enum { + data_sel___release_mem__none = 0, + data_sel___release_mem__send_32_bit_low = 1, + data_sel___release_mem__send_64_bit_data = 2, + data_sel___release_mem__send_gpu_clock_counter = 3, + data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel___release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4_mec_release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum RELEASE_MEM_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + unsigned int reserved3:6; + unsigned int atc:1; + enum RELEASE_MEM_cache_policy_enum cache_policy:2; + unsigned int reserved4:5; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int reserved5:16; + enum RELEASE_MEM_dst_sel_enum dst_sel:2; + unsigned int reserved6:6; + enum RELEASE_MEM_int_sel_enum int_sel:3; + unsigned int reserved7:2; + enum RELEASE_MEM_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + unsigned int reserved8:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + unsigned int reserved9:3; + unsigned int address_lo_64b:29; + } bitfields5; + unsigned int ordinal4; + }; + + unsigned int address_hi; + + unsigned int data_lo; + + unsigned int data_hi; +}; +#endif + +enum { + CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +}; + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4750cabe42521c29338ff54f0de559c9321589d9..b397ec726400c2a52533356363b36bfd275dfb39 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -239,11 +239,6 @@ enum kfd_preempt_type_filter { KFD_PREEMPT_TYPE_FILTER_BY_PASID }; -enum kfd_preempt_type { - KFD_PREEMPT_TYPE_WAVEFRONT, - KFD_PREEMPT_TYPE_WAVEFRONT_RESET -}; - /** * enum kfd_queue_type * @@ -294,13 +289,13 @@ enum kfd_queue_format { * @write_ptr: Defines the number of dwords written to the ring buffer. * * @doorbell_ptr: This field aim is to notify the H/W of new packet written to - * the queue ring buffer. This field should be similar to write_ptr and the user - * should update this field after he updated the write_ptr. + * the queue ring buffer. This field should be similar to write_ptr and the + * user should update this field after he updated the write_ptr. * * @doorbell_off: The doorbell offset in the doorbell pci-bar. * - * @is_interop: Defines if this is a interop queue. Interop queue means that the - * queue can access both graphics and compute resources. + * @is_interop: Defines if this is a interop queue. Interop queue means that + * the queue can access both graphics and compute resources. * * @is_active: Defines if the queue is active or not. * @@ -352,9 +347,10 @@ struct queue_properties { * @properties: The queue properties. * * @mec: Used only in no cp scheduling mode and identifies to micro engine id - * that the queue should be execute on. + * that the queue should be execute on. * - * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. + * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe + * id. * * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. * @@ -436,6 +432,7 @@ struct qcm_process_device { uint32_t gds_size; uint32_t num_gws; uint32_t num_oac; + uint32_t sh_hidden_private_base; }; /* Data that is per-process-per device. */ @@ -520,8 +517,8 @@ struct kfd_process { struct mutex event_mutex; /* All events in process hashed by ID, linked on kfd_event.events. */ DECLARE_HASHTABLE(events, 4); - struct list_head signal_event_pages; /* struct slot_page_header. - event_pages */ + /* struct slot_page_header.event_pages */ + struct list_head signal_event_pages; u32 next_nonsignal_event_id; size_t signal_event_count; }; @@ -559,8 +556,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); /* Process device data iterator */ -struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); -struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, +struct kfd_process_device *kfd_get_first_process_device_data( + struct kfd_process *p); +struct kfd_process_device *kfd_get_next_process_device_data( + struct kfd_process *p, struct kfd_process_device *pdd); bool kfd_has_process_device_data(struct kfd_process *p); @@ -573,7 +572,8 @@ unsigned int kfd_pasid_alloc(void); void kfd_pasid_free(unsigned int pasid); /* Doorbells */ -void kfd_doorbell_init(struct kfd_dev *kfd); +int kfd_doorbell_init(struct kfd_dev *kfd); +void kfd_doorbell_fini(struct kfd_dev *kfd); int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 035bbc98a63d20f4bed8a15bb2a7527826a2616e..c74cf22a1ed9d5d7d0c7c7e35e31786f6464161c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -79,9 +79,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) { struct kfd_process *process; - BUG_ON(!kfd_process_wq); - - if (thread->mm == NULL) + if (!thread->mm) return ERR_PTR(-EINVAL); /* Only the pthreads threading model is supported. */ @@ -101,7 +99,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread) /* A prior open of /dev/kfd could have already created the process. */ process = find_process(thread); if (process) - pr_debug("kfd: process already found\n"); + pr_debug("Process already found\n"); if (!process) process = create_process(thread); @@ -117,7 +115,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) { struct kfd_process *process; - if (thread->mm == NULL) + if (!thread->mm) return ERR_PTR(-EINVAL); /* Only the pthreads threading model is supported. */ @@ -202,10 +200,8 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu) struct kfd_process_release_work *work; struct kfd_process *p; - BUG_ON(!kfd_process_wq); - p = container_of(rcu, struct kfd_process, rcu); - BUG_ON(atomic_read(&p->mm->mm_count) <= 0); + WARN_ON(atomic_read(&p->mm->mm_count) <= 0); mmdrop(p->mm); @@ -229,7 +225,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, * mmu_notifier srcu is read locked */ p = container_of(mn, struct kfd_process, mmu_notifier); - BUG_ON(p->mm != mm); + if (WARN_ON(p->mm != mm)) + return; mutex_lock(&kfd_processes_mutex); hash_del_rcu(&p->kfd_processes); @@ -250,7 +247,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, kfd_dbgmgr_destroy(pdd->dev->dbgmgr); if (pdd->reset_wavefronts) { - pr_warn("amdkfd: Resetting all wave fronts\n"); + pr_warn("Resetting all wave fronts\n"); dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; } @@ -407,8 +404,6 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) struct kfd_process *p; struct kfd_process_device *pdd; - BUG_ON(dev == NULL); - /* * Look for the process that matches the pasid. If there is no such * process, we either released it in amdkfd's own notifier, or there @@ -449,14 +444,16 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) mutex_unlock(&p->mutex); } -struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) +struct kfd_process_device *kfd_get_first_process_device_data( + struct kfd_process *p) { return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list); } -struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, +struct kfd_process_device *kfd_get_next_process_device_data( + struct kfd_process *p, struct kfd_process_device *pdd) { if (list_is_last(&pdd->per_device_list, &p->per_device_data)) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 32cdf2b483dbdf9a4f97bf92c8f1c12f292cf093..1cae95e2b13adedfb19760279861fae827d057a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -32,12 +32,9 @@ static inline struct process_queue_node *get_queue_by_qid( { struct process_queue_node *pqn; - BUG_ON(!pqm); - list_for_each_entry(pqn, &pqm->queues, process_queue_list) { - if (pqn->q && pqn->q->properties.queue_id == qid) - return pqn; - if (pqn->kq && pqn->kq->queue->properties.queue_id == qid) + if ((pqn->q && pqn->q->properties.queue_id == qid) || + (pqn->kq && pqn->kq->queue->properties.queue_id == qid)) return pqn; } @@ -49,17 +46,13 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, { unsigned long found; - BUG_ON(!pqm || !qid); - - pr_debug("kfd: in %s\n", __func__); - found = find_first_zero_bit(pqm->queue_slot_bitmap, KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); - pr_debug("kfd: the new slot id %lu\n", found); + pr_debug("The new slot id %lu\n", found); if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { - pr_info("amdkfd: Can not open more queues for process with pasid %d\n", + pr_info("Cannot open more queues for process with pasid %d\n", pqm->process->pasid); return -ENOMEM; } @@ -72,13 +65,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm, int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { - BUG_ON(!pqm); - INIT_LIST_HEAD(&pqm->queues); pqm->queue_slot_bitmap = kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_BYTE), GFP_KERNEL); - if (pqm->queue_slot_bitmap == NULL) + if (!pqm->queue_slot_bitmap) return -ENOMEM; pqm->process = p; @@ -90,10 +81,6 @@ void pqm_uninit(struct process_queue_manager *pqm) int retval; struct process_queue_node *pqn, *next; - BUG_ON(!pqm); - - pr_debug("In func %s\n", __func__); - list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { retval = pqm_destroy_queue( pqm, @@ -102,7 +89,7 @@ void pqm_uninit(struct process_queue_manager *pqm) pqn->kq->queue->properties.queue_id); if (retval != 0) { - pr_err("kfd: failed to destroy queue\n"); + pr_err("failed to destroy queue\n"); return; } } @@ -117,8 +104,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, { int retval; - retval = 0; - /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; @@ -131,16 +116,13 @@ static int create_cp_queue(struct process_queue_manager *pqm, retval = init_queue(q, q_properties); if (retval != 0) - goto err_init_queue; + return retval; (*q)->device = dev; (*q)->process = pqm->process; - pr_debug("kfd: PQM After init queue"); - - return retval; + pr_debug("PQM After init queue"); -err_init_queue: return retval; } @@ -161,8 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, int num_queues = 0; struct queue *cur; - BUG_ON(!pqm || !dev || !properties || !qid); - memset(&q_properties, 0, sizeof(struct queue_properties)); memcpy(&q_properties, properties, sizeof(struct queue_properties)); q = NULL; @@ -185,7 +165,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, list_for_each_entry(cur, &pdd->qpd.queues_list, list) num_queues++; if (num_queues >= dev->device_info->max_no_of_hqd/2) - return (-ENOSPC); + return -ENOSPC; } retval = find_available_queue_slot(pqm, qid); @@ -197,7 +177,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, dev->dqm->ops.register_process(dev->dqm, &pdd->qpd); } - pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); + pqn = kzalloc(sizeof(*pqn), GFP_KERNEL); if (!pqn) { retval = -ENOMEM; goto err_allocate_pqn; @@ -210,7 +190,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= VMID_PER_DEVICE) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { - pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); + pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); retval = -EPERM; goto err_create_queue; } @@ -227,7 +207,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, break; case KFD_QUEUE_TYPE_DIQ: kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); - if (kq == NULL) { + if (!kq) { retval = -ENOMEM; goto err_create_queue; } @@ -238,22 +218,22 @@ int pqm_create_queue(struct process_queue_manager *pqm, kq, &pdd->qpd); break; default: - BUG(); - break; + WARN(1, "Invalid queue type %d", type); + retval = -EINVAL; } if (retval != 0) { - pr_debug("Error dqm create queue\n"); + pr_err("DQM create queue failed\n"); goto err_create_queue; } - pr_debug("kfd: PQM After DQM create queue\n"); + pr_debug("PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); if (q) { *properties = q->properties; - pr_debug("kfd: PQM done creating queue\n"); + pr_debug("PQM done creating queue\n"); print_queue_properties(properties); } @@ -279,14 +259,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = NULL; - BUG_ON(!pqm); retval = 0; - pr_debug("kfd: In Func %s\n", __func__); - pqn = get_queue_by_qid(pqm, qid); - if (pqn == NULL) { - pr_err("kfd: queue id does not match any known queue\n"); + if (!pqn) { + pr_err("Queue id does not match any known queue\n"); return -EINVAL; } @@ -295,7 +272,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dev = pqn->kq->dev; if (pqn->q) dev = pqn->q->device; - BUG_ON(!dev); + if (WARN_ON(!dev)) + return -ENODEV; pdd = kfd_get_process_device_data(dev, pqm->process); if (!pdd) { @@ -335,12 +313,9 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, int retval; struct process_queue_node *pqn; - BUG_ON(!pqm); - pqn = get_queue_by_qid(pqm, qid); if (!pqn) { - pr_debug("amdkfd: No queue %d exists for update operation\n", - qid); + pr_debug("No queue %d exists for update operation\n", qid); return -EFAULT; } @@ -363,8 +338,6 @@ struct kernel_queue *pqm_get_kernel_queue( { struct process_queue_node *pqn; - BUG_ON(!pqm); - pqn = get_queue_by_qid(pqm, qid); if (pqn && pqn->kq) return pqn->kq; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 0ab197077f2dc03e18a6e96ea5ddc179c8d0d498..a5315d4f1c954ba6c706ad4a99e1fbd781e0d468 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -65,17 +65,15 @@ void print_queue(struct queue *q) int init_queue(struct queue **q, const struct queue_properties *properties) { - struct queue *tmp; + struct queue *tmp_q; - BUG_ON(!q); - - tmp = kzalloc(sizeof(struct queue), GFP_KERNEL); - if (!tmp) + tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL); + if (!tmp_q) return -ENOMEM; - memcpy(&tmp->properties, properties, sizeof(struct queue_properties)); + memcpy(&tmp_q->properties, properties, sizeof(*properties)); - *q = tmp; + *q = tmp_q; return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 1e50647499593927f6238602ec7ba0efce216eb4..19ce59028d6bdb93844ac1582d237a11202fea73 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -108,9 +108,6 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { - BUG_ON(!dev); - BUG_ON(!cu); - dev->node_props.cpu_cores_count = cu->num_cpu_cores; dev->node_props.cpu_core_id_base = cu->processor_id_low; if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) @@ -123,9 +120,6 @@ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) { - BUG_ON(!dev); - BUG_ON(!cu); - dev->node_props.simd_id_base = cu->processor_id_low; dev->node_props.simd_count = cu->num_simd_cores; dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; @@ -148,8 +142,6 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) struct kfd_topology_device *dev; int i = 0; - BUG_ON(!cu); - pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", cu->proximity_domain, cu->hsa_capability); list_for_each_entry(dev, &topology_device_list, list) { @@ -177,8 +169,6 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) struct kfd_topology_device *dev; int i = 0; - BUG_ON(!mem); - pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", mem->promixity_domain); list_for_each_entry(dev, &topology_device_list, list) { @@ -223,8 +213,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) struct kfd_topology_device *dev; uint32_t id; - BUG_ON(!cache); - id = cache->processor_id_low; pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); @@ -274,8 +262,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) uint32_t id_from; uint32_t id_to; - BUG_ON(!iolink); - id_from = iolink->proximity_domain_from; id_to = iolink->proximity_domain_to; @@ -323,8 +309,6 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) struct crat_subtype_iolink *iolink; int ret = 0; - BUG_ON(!sub_type_hdr); - switch (sub_type_hdr->type) { case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: cu = (struct crat_subtype_computeunit *)sub_type_hdr; @@ -368,8 +352,6 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; - BUG_ON(!dev); - list_del(&dev->list); while (dev->mem_props.next != &dev->mem_props) { @@ -416,7 +398,7 @@ static struct kfd_topology_device *kfd_create_topology_device(void) struct kfd_topology_device *dev; dev = kfd_alloc_struct(dev); - if (dev == NULL) { + if (!dev) { pr_err("No memory to allocate a topology device"); return NULL; } @@ -666,7 +648,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.simd_count); if (dev->mem_bank_count < dev->node_props.mem_banks_count) { - pr_info_once("kfd: mem_banks_count truncated from %d to %d\n", + pr_info_once("mem_banks_count truncated from %d to %d\n", dev->node_props.mem_banks_count, dev->mem_bank_count); sysfs_show_32bit_prop(buffer, "mem_banks_count", @@ -763,8 +745,6 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; - BUG_ON(!dev); - if (dev->kobj_iolink) { list_for_each_entry(iolink, &dev->io_link_props, list) if (iolink->kobj) { @@ -819,12 +799,12 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, int ret; uint32_t i; - BUG_ON(!dev); + if (WARN_ON(dev->kobj_node)) + return -EEXIST; /* * Creating the sysfs folders */ - BUG_ON(dev->kobj_node); dev->kobj_node = kfd_alloc_struct(dev->kobj_node); if (!dev->kobj_node) return -ENOMEM; @@ -957,7 +937,7 @@ static int kfd_topology_update_sysfs(void) int ret; pr_info("Creating topology SYSFS entries\n"); - if (sys_props.kobj_topology == NULL) { + if (!sys_props.kobj_topology) { sys_props.kobj_topology = kfd_alloc_struct(sys_props.kobj_topology); if (!sys_props.kobj_topology) @@ -1117,10 +1097,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) struct kfd_topology_device *dev; struct kfd_topology_device *out_dev = NULL; - BUG_ON(!gpu); - list_for_each_entry(dev, &topology_device_list, list) - if (dev->gpu == NULL && dev->node_props.simd_count > 0) { + if (!dev->gpu && (dev->node_props.simd_count > 0)) { dev->gpu = gpu; out_dev = dev; break; @@ -1143,11 +1121,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu) struct kfd_topology_device *dev; int res; - BUG_ON(!gpu); - gpu_id = kfd_generate_gpu_id(gpu); - pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); down_write(&topology_lock); /* @@ -1170,8 +1146,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * GPU vBIOS */ - /* - * Update the SYSFS tree, since we added another topology device + /* Update the SYSFS tree, since we added another topology + * device */ if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); @@ -1190,7 +1166,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE; - pr_info("amdkfd: adding doorbell packet type capability\n"); + pr_info("Adding doorbell packet type capability\n"); } res = 0; @@ -1210,8 +1186,6 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) uint32_t gpu_id; int res = -ENODEV; - BUG_ON(!gpu); - down_write(&topology_lock); list_for_each_entry(dev, &topology_device_list, list) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 0021a1c63356e8cee086dfc50c0dd5c09329abfa..837296db9628b98a37bdc7d2941aba759adcc405 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1233,6 +1233,69 @@ struct atom_asic_profiling_info_v4_1 uint32_t phyclk2gfxclk_c; }; +struct atom_asic_profiling_info_v4_2 { + struct atom_common_table_header table_header; + uint32_t maxvddc; + uint32_t minvddc; + uint32_t avfs_meannsigma_acontant0; + uint32_t avfs_meannsigma_acontant1; + uint32_t avfs_meannsigma_acontant2; + uint16_t avfs_meannsigma_dc_tol_sigma; + uint16_t avfs_meannsigma_platform_mean; + uint16_t avfs_meannsigma_platform_sigma; + uint32_t gb_vdroop_table_cksoff_a0; + uint32_t gb_vdroop_table_cksoff_a1; + uint32_t gb_vdroop_table_cksoff_a2; + uint32_t gb_vdroop_table_ckson_a0; + uint32_t gb_vdroop_table_ckson_a1; + uint32_t gb_vdroop_table_ckson_a2; + uint32_t avfsgb_fuse_table_cksoff_m1; + uint32_t avfsgb_fuse_table_cksoff_m2; + uint32_t avfsgb_fuse_table_cksoff_b; + uint32_t avfsgb_fuse_table_ckson_m1; + uint32_t avfsgb_fuse_table_ckson_m2; + uint32_t avfsgb_fuse_table_ckson_b; + uint16_t max_voltage_0_25mv; + uint8_t enable_gb_vdroop_table_cksoff; + uint8_t enable_gb_vdroop_table_ckson; + uint8_t enable_gb_fuse_table_cksoff; + uint8_t enable_gb_fuse_table_ckson; + uint16_t psm_age_comfactor; + uint8_t enable_apply_avfs_cksoff_voltage; + uint8_t reserved; + uint32_t dispclk2gfxclk_a; + uint32_t dispclk2gfxclk_b; + uint32_t dispclk2gfxclk_c; + uint32_t pixclk2gfxclk_a; + uint32_t pixclk2gfxclk_b; + uint32_t pixclk2gfxclk_c; + uint32_t dcefclk2gfxclk_a; + uint32_t dcefclk2gfxclk_b; + uint32_t dcefclk2gfxclk_c; + uint32_t phyclk2gfxclk_a; + uint32_t phyclk2gfxclk_b; + uint32_t phyclk2gfxclk_c; + uint32_t acg_gb_vdroop_table_a0; + uint32_t acg_gb_vdroop_table_a1; + uint32_t acg_gb_vdroop_table_a2; + uint32_t acg_avfsgb_fuse_table_m1; + uint32_t acg_avfsgb_fuse_table_m2; + uint32_t acg_avfsgb_fuse_table_b; + uint8_t enable_acg_gb_vdroop_table; + uint8_t enable_acg_gb_fuse_table; + uint32_t acg_dispclk2gfxclk_a; + uint32_t acg_dispclk2gfxclk_b; + uint32_t acg_dispclk2gfxclk_c; + uint32_t acg_pixclk2gfxclk_a; + uint32_t acg_pixclk2gfxclk_b; + uint32_t acg_pixclk2gfxclk_c; + uint32_t acg_dcefclk2gfxclk_a; + uint32_t acg_dcefclk2gfxclk_b; + uint32_t acg_dcefclk2gfxclk_c; + uint32_t acg_phyclk2gfxclk_a; + uint32_t acg_phyclk2gfxclk_b; + uint32_t acg_phyclk2gfxclk_c; +}; /* *************************************************************************** diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h index 0a94f749e3c0a24f5d9b98256f19336d73704a09..0214f63f52fc6a6ac14dd1500c4efdaf2e0fcf14 100644 --- a/drivers/gpu/drm/amd/include/cgs_common.h +++ b/drivers/gpu/drm/amd/include/cgs_common.h @@ -50,6 +50,7 @@ enum cgs_ind_reg { CGS_IND_REG__UVD_CTX, CGS_IND_REG__DIDT, CGS_IND_REG_GC_CAC, + CGS_IND_REG_SE_CAC, CGS_IND_REG__AUDIO_ENDPT }; @@ -406,6 +407,8 @@ typedef int (*cgs_is_virtualization_enabled_t)(void *cgs_device); typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en); +typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock); + struct cgs_ops { /* memory management calls (similar to KFD interface) */ cgs_alloc_gpu_mem_t alloc_gpu_mem; @@ -441,6 +444,7 @@ struct cgs_ops { cgs_query_system_info query_system_info; cgs_is_virtualization_enabled_t is_virtualization_enabled; cgs_enter_safe_mode enter_safe_mode; + cgs_lock_grbm_idx lock_grbm_idx; }; struct cgs_os_ops; /* To be define in OS-specific CGS header */ @@ -517,4 +521,6 @@ struct cgs_device #define cgs_enter_safe_mode(cgs_device, en) \ CGS_CALL(enter_safe_mode, cgs_device, en) +#define cgs_lock_grbm_idx(cgs_device, lock) \ + CGS_CALL(lock_grbm_idx, cgs_device, lock) #endif /* _CGS_COMMON_H */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 36f376677a53322dce8e3e95cc66135457d6ea86..94277cb734d2f6febbeeef143db081a3c78e2f66 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -41,6 +41,11 @@ struct kgd_dev; struct kgd_mem; +enum kfd_preempt_type { + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, +}; + enum kgd_memory_pool { KGD_POOL_SYSTEM_CACHEABLE = 1, KGD_POOL_SYSTEM_WRITECOMBINE = 2, @@ -82,6 +87,17 @@ struct kgd2kfd_shared_resources { size_t doorbell_start_offset; }; +struct tile_config { + uint32_t *tile_config_ptr; + uint32_t *macro_tile_config_ptr; + uint32_t num_tile_configs; + uint32_t num_macro_tile_configs; + + uint32_t gb_addr_config; + uint32_t num_banks; + uint32_t num_ranks; +}; + /** * struct kfd2kgd_calls * @@ -123,6 +139,11 @@ struct kgd2kfd_shared_resources { * * @get_fw_version: Returns FW versions from the header * + * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID. + * Only used for no cp scheduling mode + * + * @get_tile_config: Returns GPU-specific tiling mode information + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -153,14 +174,16 @@ struct kfd2kgd_calls { int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr); + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd); bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); - int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, + int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id); @@ -192,6 +215,9 @@ struct kfd2kgd_calls { uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); + void (*set_scratch_backing_va)(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); }; /** diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index 0b74da3dca8b5d83e2af51347a4afb5b77e43a40..bc839ff0bdd0425218a729f4838dc073ebfe4d30 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -1240,13 +1240,18 @@ static int cz_phm_force_dpm_highest(struct pp_hwmgr *hwmgr) { struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); - if (cz_hwmgr->sclk_dpm.soft_min_clk != - cz_hwmgr->sclk_dpm.soft_max_clk) - smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, - PPSMC_MSG_SetSclkSoftMin, - cz_get_sclk_level(hwmgr, - cz_hwmgr->sclk_dpm.soft_max_clk, - PPSMC_MSG_SetSclkSoftMin)); + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMin, + cz_get_sclk_level(hwmgr, + cz_hwmgr->sclk_dpm.soft_max_clk, + PPSMC_MSG_SetSclkSoftMin)); + + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMax, + cz_get_sclk_level(hwmgr, + cz_hwmgr->sclk_dpm.soft_max_clk, + PPSMC_MSG_SetSclkSoftMax)); + return 0; } @@ -1292,17 +1297,55 @@ static int cz_phm_force_dpm_lowest(struct pp_hwmgr *hwmgr) { struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); - if (cz_hwmgr->sclk_dpm.soft_min_clk != - cz_hwmgr->sclk_dpm.soft_max_clk) { - cz_hwmgr->sclk_dpm.soft_max_clk = - cz_hwmgr->sclk_dpm.soft_min_clk; + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMax, + cz_get_sclk_level(hwmgr, + cz_hwmgr->sclk_dpm.soft_min_clk, + PPSMC_MSG_SetSclkSoftMax)); - smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMin, + cz_get_sclk_level(hwmgr, + cz_hwmgr->sclk_dpm.soft_min_clk, + PPSMC_MSG_SetSclkSoftMin)); + + return 0; +} + +static int cz_phm_force_dpm_sclk(struct pp_hwmgr *hwmgr, uint32_t sclk) +{ + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMin, + cz_get_sclk_level(hwmgr, + sclk, + PPSMC_MSG_SetSclkSoftMin)); + + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_SetSclkSoftMax, cz_get_sclk_level(hwmgr, - cz_hwmgr->sclk_dpm.soft_max_clk, + sclk, PPSMC_MSG_SetSclkSoftMax)); + return 0; +} + +static int cz_get_profiling_clk(struct pp_hwmgr *hwmgr, uint32_t *sclk) +{ + struct phm_clock_voltage_dependency_table *table = + hwmgr->dyn_state.vddc_dependency_on_sclk; + int32_t tmp_sclk; + int32_t count; + + tmp_sclk = table->entries[table->count-1].clk * 70 / 100; + + for (count = table->count-1; count >= 0; count--) { + if (tmp_sclk >= table->entries[count].clk) { + tmp_sclk = table->entries[count].clk; + *sclk = tmp_sclk; + break; + } } + if (count < 0) + *sclk = table->entries[0].clk; return 0; } @@ -1310,30 +1353,70 @@ static int cz_phm_force_dpm_lowest(struct pp_hwmgr *hwmgr) static int cz_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level) { + uint32_t sclk = 0; int ret = 0; + uint32_t profile_mode_mask = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + + if (level == hwmgr->dpm_level) + return ret; + + if (!(hwmgr->dpm_level & profile_mode_mask)) { + /* enter profile mode, save current level, disable gfx cg*/ + if (level & profile_mode_mask) { + hwmgr->saved_dpm_level = hwmgr->dpm_level; + cgs_set_clockgating_state(hwmgr->device, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); + } + } else { + /* exit profile mode, restore level, enable gfx cg*/ + if (!(level & profile_mode_mask)) { + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT) + level = hwmgr->saved_dpm_level; + cgs_set_clockgating_state(hwmgr->device, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_GATE); + } + } switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: ret = cz_phm_force_dpm_highest(hwmgr); if (ret) return ret; + hwmgr->dpm_level = level; break; case AMD_DPM_FORCED_LEVEL_LOW: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: ret = cz_phm_force_dpm_lowest(hwmgr); if (ret) return ret; + hwmgr->dpm_level = level; break; case AMD_DPM_FORCED_LEVEL_AUTO: ret = cz_phm_unforce_dpm_levels(hwmgr); if (ret) return ret; + hwmgr->dpm_level = level; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + ret = cz_get_profiling_clk(hwmgr, &sclk); + if (ret) + return ret; + hwmgr->dpm_level = level; + cz_phm_force_dpm_sclk(hwmgr, sclk); + break; + case AMD_DPM_FORCED_LEVEL_MANUAL: + hwmgr->dpm_level = level; break; + case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; } - hwmgr->dpm_level = level; - return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index d025653c78235fabc68413a605475aed2fe3aa9f..9547f265a8bb86d11e1edd42637eda55adb6c8d3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -557,9 +557,8 @@ uint16_t phm_find_closest_vddci(struct pp_atomctrl_voltage_table *vddci_table, u return vddci_table->entries[i].value; } - PP_ASSERT_WITH_CODE(false, - "VDDCI is larger than max VDDCI in VDDCI Voltage Table!", - return vddci_table->entries[i-1].value); + pr_debug("vddci is larger than max value in vddci_table\n"); + return vddci_table->entries[i-1].value; } int phm_find_boot_level(void *table, @@ -583,26 +582,26 @@ int phm_get_sclk_for_voltage_evv(struct pp_hwmgr *hwmgr, phm_ppt_v1_voltage_lookup_table *lookup_table, uint16_t virtual_voltage_id, int32_t *sclk) { - uint8_t entryId; - uint8_t voltageId; + uint8_t entry_id; + uint8_t voltage_id; struct phm_ppt_v1_information *table_info = (struct phm_ppt_v1_information *)(hwmgr->pptable); PP_ASSERT_WITH_CODE(lookup_table->count != 0, "Lookup table is empty", return -EINVAL); /* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */ - for (entryId = 0; entryId < table_info->vdd_dep_on_sclk->count; entryId++) { - voltageId = table_info->vdd_dep_on_sclk->entries[entryId].vddInd; - if (lookup_table->entries[voltageId].us_vdd == virtual_voltage_id) + for (entry_id = 0; entry_id < table_info->vdd_dep_on_sclk->count; entry_id++) { + voltage_id = table_info->vdd_dep_on_sclk->entries[entry_id].vddInd; + if (lookup_table->entries[voltage_id].us_vdd == virtual_voltage_id) break; } - PP_ASSERT_WITH_CODE(entryId < table_info->vdd_dep_on_sclk->count, - "Can't find requested voltage id in vdd_dep_on_sclk table!", - return -EINVAL; - ); + if (entry_id >= table_info->vdd_dep_on_sclk->count) { + pr_debug("Can't find requested voltage id in vdd_dep_on_sclk table\n"); + return -EINVAL; + } - *sclk = table_info->vdd_dep_on_sclk->entries[entryId].clk; + *sclk = table_info->vdd_dep_on_sclk->entries[entry_id].clk; return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c index 720d5006ff62842efbf46bdc47ea03bba6ad5bb1..c062844b15f3c076f3490e1f4a017340e91bc3b3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c @@ -142,7 +142,7 @@ int pp_atomfwctrl_get_voltage_table_v4(struct pp_hwmgr *hwmgr, } } else if (voltage_mode == VOLTAGE_OBJ_SVID2) { voltage_table->psi1_enable = - voltage_object->svid2_voltage_obj.loadline_psi1 & 0x1; + (voltage_object->svid2_voltage_obj.loadline_psi1 & 0x20) >> 5; voltage_table->psi0_enable = voltage_object->svid2_voltage_obj.psi0_enable & 0x1; voltage_table->max_vid_step = @@ -276,7 +276,10 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_avfs_parameters *param) { uint16_t idx; + uint8_t format_revision, content_revision; + struct atom_asic_profiling_info_v4_1 *profile; + struct atom_asic_profiling_info_v4_2 *profile_v4_2; idx = GetIndexIntoMasterDataTable(asic_profiling_info); profile = (struct atom_asic_profiling_info_v4_1 *) @@ -286,76 +289,172 @@ int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr, if (!profile) return -1; - param->ulMaxVddc = le32_to_cpu(profile->maxvddc); - param->ulMinVddc = le32_to_cpu(profile->minvddc); - param->ulMeanNsigmaAcontant0 = - le32_to_cpu(profile->avfs_meannsigma_acontant0); - param->ulMeanNsigmaAcontant1 = - le32_to_cpu(profile->avfs_meannsigma_acontant1); - param->ulMeanNsigmaAcontant2 = - le32_to_cpu(profile->avfs_meannsigma_acontant2); - param->usMeanNsigmaDcTolSigma = - le16_to_cpu(profile->avfs_meannsigma_dc_tol_sigma); - param->usMeanNsigmaPlatformMean = - le16_to_cpu(profile->avfs_meannsigma_platform_mean); - param->usMeanNsigmaPlatformSigma = - le16_to_cpu(profile->avfs_meannsigma_platform_sigma); - param->ulGbVdroopTableCksoffA0 = - le32_to_cpu(profile->gb_vdroop_table_cksoff_a0); - param->ulGbVdroopTableCksoffA1 = - le32_to_cpu(profile->gb_vdroop_table_cksoff_a1); - param->ulGbVdroopTableCksoffA2 = - le32_to_cpu(profile->gb_vdroop_table_cksoff_a2); - param->ulGbVdroopTableCksonA0 = - le32_to_cpu(profile->gb_vdroop_table_ckson_a0); - param->ulGbVdroopTableCksonA1 = - le32_to_cpu(profile->gb_vdroop_table_ckson_a1); - param->ulGbVdroopTableCksonA2 = - le32_to_cpu(profile->gb_vdroop_table_ckson_a2); - param->ulGbFuseTableCksoffM1 = - le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m1); - param->ulGbFuseTableCksoffM2 = - le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m2); - param->ulGbFuseTableCksoffB = - le32_to_cpu(profile->avfsgb_fuse_table_cksoff_b); - param->ulGbFuseTableCksonM1 = - le32_to_cpu(profile->avfsgb_fuse_table_ckson_m1); - param->ulGbFuseTableCksonM2 = - le32_to_cpu(profile->avfsgb_fuse_table_ckson_m2); - param->ulGbFuseTableCksonB = - le32_to_cpu(profile->avfsgb_fuse_table_ckson_b); - - param->ucEnableGbVdroopTableCkson = - profile->enable_gb_vdroop_table_ckson; - param->ucEnableGbFuseTableCkson = - profile->enable_gb_fuse_table_ckson; - param->usPsmAgeComfactor = - le16_to_cpu(profile->psm_age_comfactor); - - param->ulDispclk2GfxclkM1 = - le32_to_cpu(profile->dispclk2gfxclk_a); - param->ulDispclk2GfxclkM2 = - le32_to_cpu(profile->dispclk2gfxclk_b); - param->ulDispclk2GfxclkB = - le32_to_cpu(profile->dispclk2gfxclk_c); - param->ulDcefclk2GfxclkM1 = - le32_to_cpu(profile->dcefclk2gfxclk_a); - param->ulDcefclk2GfxclkM2 = - le32_to_cpu(profile->dcefclk2gfxclk_b); - param->ulDcefclk2GfxclkB = - le32_to_cpu(profile->dcefclk2gfxclk_c); - param->ulPixelclk2GfxclkM1 = - le32_to_cpu(profile->pixclk2gfxclk_a); - param->ulPixelclk2GfxclkM2 = - le32_to_cpu(profile->pixclk2gfxclk_b); - param->ulPixelclk2GfxclkB = - le32_to_cpu(profile->pixclk2gfxclk_c); - param->ulPhyclk2GfxclkM1 = - le32_to_cpu(profile->phyclk2gfxclk_a); - param->ulPhyclk2GfxclkM2 = - le32_to_cpu(profile->phyclk2gfxclk_b); - param->ulPhyclk2GfxclkB = - le32_to_cpu(profile->phyclk2gfxclk_c); + format_revision = ((struct atom_common_table_header *)profile)->format_revision; + content_revision = ((struct atom_common_table_header *)profile)->content_revision; + + if (format_revision == 4 && content_revision == 1) { + param->ulMaxVddc = le32_to_cpu(profile->maxvddc); + param->ulMinVddc = le32_to_cpu(profile->minvddc); + param->ulMeanNsigmaAcontant0 = + le32_to_cpu(profile->avfs_meannsigma_acontant0); + param->ulMeanNsigmaAcontant1 = + le32_to_cpu(profile->avfs_meannsigma_acontant1); + param->ulMeanNsigmaAcontant2 = + le32_to_cpu(profile->avfs_meannsigma_acontant2); + param->usMeanNsigmaDcTolSigma = + le16_to_cpu(profile->avfs_meannsigma_dc_tol_sigma); + param->usMeanNsigmaPlatformMean = + le16_to_cpu(profile->avfs_meannsigma_platform_mean); + param->usMeanNsigmaPlatformSigma = + le16_to_cpu(profile->avfs_meannsigma_platform_sigma); + param->ulGbVdroopTableCksoffA0 = + le32_to_cpu(profile->gb_vdroop_table_cksoff_a0); + param->ulGbVdroopTableCksoffA1 = + le32_to_cpu(profile->gb_vdroop_table_cksoff_a1); + param->ulGbVdroopTableCksoffA2 = + le32_to_cpu(profile->gb_vdroop_table_cksoff_a2); + param->ulGbVdroopTableCksonA0 = + le32_to_cpu(profile->gb_vdroop_table_ckson_a0); + param->ulGbVdroopTableCksonA1 = + le32_to_cpu(profile->gb_vdroop_table_ckson_a1); + param->ulGbVdroopTableCksonA2 = + le32_to_cpu(profile->gb_vdroop_table_ckson_a2); + param->ulGbFuseTableCksoffM1 = + le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m1); + param->ulGbFuseTableCksoffM2 = + le32_to_cpu(profile->avfsgb_fuse_table_cksoff_m2); + param->ulGbFuseTableCksoffB = + le32_to_cpu(profile->avfsgb_fuse_table_cksoff_b); + param->ulGbFuseTableCksonM1 = + le32_to_cpu(profile->avfsgb_fuse_table_ckson_m1); + param->ulGbFuseTableCksonM2 = + le32_to_cpu(profile->avfsgb_fuse_table_ckson_m2); + param->ulGbFuseTableCksonB = + le32_to_cpu(profile->avfsgb_fuse_table_ckson_b); + + param->ucEnableGbVdroopTableCkson = + profile->enable_gb_vdroop_table_ckson; + param->ucEnableGbFuseTableCkson = + profile->enable_gb_fuse_table_ckson; + param->usPsmAgeComfactor = + le16_to_cpu(profile->psm_age_comfactor); + + param->ulDispclk2GfxclkM1 = + le32_to_cpu(profile->dispclk2gfxclk_a); + param->ulDispclk2GfxclkM2 = + le32_to_cpu(profile->dispclk2gfxclk_b); + param->ulDispclk2GfxclkB = + le32_to_cpu(profile->dispclk2gfxclk_c); + param->ulDcefclk2GfxclkM1 = + le32_to_cpu(profile->dcefclk2gfxclk_a); + param->ulDcefclk2GfxclkM2 = + le32_to_cpu(profile->dcefclk2gfxclk_b); + param->ulDcefclk2GfxclkB = + le32_to_cpu(profile->dcefclk2gfxclk_c); + param->ulPixelclk2GfxclkM1 = + le32_to_cpu(profile->pixclk2gfxclk_a); + param->ulPixelclk2GfxclkM2 = + le32_to_cpu(profile->pixclk2gfxclk_b); + param->ulPixelclk2GfxclkB = + le32_to_cpu(profile->pixclk2gfxclk_c); + param->ulPhyclk2GfxclkM1 = + le32_to_cpu(profile->phyclk2gfxclk_a); + param->ulPhyclk2GfxclkM2 = + le32_to_cpu(profile->phyclk2gfxclk_b); + param->ulPhyclk2GfxclkB = + le32_to_cpu(profile->phyclk2gfxclk_c); + param->ulAcgGbVdroopTableA0 = 0; + param->ulAcgGbVdroopTableA1 = 0; + param->ulAcgGbVdroopTableA2 = 0; + param->ulAcgGbFuseTableM1 = 0; + param->ulAcgGbFuseTableM2 = 0; + param->ulAcgGbFuseTableB = 0; + param->ucAcgEnableGbVdroopTable = 0; + param->ucAcgEnableGbFuseTable = 0; + } else if (format_revision == 4 && content_revision == 2) { + profile_v4_2 = (struct atom_asic_profiling_info_v4_2 *)profile; + param->ulMaxVddc = le32_to_cpu(profile_v4_2->maxvddc); + param->ulMinVddc = le32_to_cpu(profile_v4_2->minvddc); + param->ulMeanNsigmaAcontant0 = + le32_to_cpu(profile_v4_2->avfs_meannsigma_acontant0); + param->ulMeanNsigmaAcontant1 = + le32_to_cpu(profile_v4_2->avfs_meannsigma_acontant1); + param->ulMeanNsigmaAcontant2 = + le32_to_cpu(profile_v4_2->avfs_meannsigma_acontant2); + param->usMeanNsigmaDcTolSigma = + le16_to_cpu(profile_v4_2->avfs_meannsigma_dc_tol_sigma); + param->usMeanNsigmaPlatformMean = + le16_to_cpu(profile_v4_2->avfs_meannsigma_platform_mean); + param->usMeanNsigmaPlatformSigma = + le16_to_cpu(profile_v4_2->avfs_meannsigma_platform_sigma); + param->ulGbVdroopTableCksoffA0 = + le32_to_cpu(profile_v4_2->gb_vdroop_table_cksoff_a0); + param->ulGbVdroopTableCksoffA1 = + le32_to_cpu(profile_v4_2->gb_vdroop_table_cksoff_a1); + param->ulGbVdroopTableCksoffA2 = + le32_to_cpu(profile_v4_2->gb_vdroop_table_cksoff_a2); + param->ulGbVdroopTableCksonA0 = + le32_to_cpu(profile_v4_2->gb_vdroop_table_ckson_a0); + param->ulGbVdroopTableCksonA1 = + le32_to_cpu(profile_v4_2->gb_vdroop_table_ckson_a1); + param->ulGbVdroopTableCksonA2 = + le32_to_cpu(profile_v4_2->gb_vdroop_table_ckson_a2); + param->ulGbFuseTableCksoffM1 = + le32_to_cpu(profile_v4_2->avfsgb_fuse_table_cksoff_m1); + param->ulGbFuseTableCksoffM2 = + le32_to_cpu(profile_v4_2->avfsgb_fuse_table_cksoff_m2); + param->ulGbFuseTableCksoffB = + le32_to_cpu(profile_v4_2->avfsgb_fuse_table_cksoff_b); + param->ulGbFuseTableCksonM1 = + le32_to_cpu(profile_v4_2->avfsgb_fuse_table_ckson_m1); + param->ulGbFuseTableCksonM2 = + le32_to_cpu(profile_v4_2->avfsgb_fuse_table_ckson_m2); + param->ulGbFuseTableCksonB = + le32_to_cpu(profile_v4_2->avfsgb_fuse_table_ckson_b); + + param->ucEnableGbVdroopTableCkson = + profile_v4_2->enable_gb_vdroop_table_ckson; + param->ucEnableGbFuseTableCkson = + profile_v4_2->enable_gb_fuse_table_ckson; + param->usPsmAgeComfactor = + le16_to_cpu(profile_v4_2->psm_age_comfactor); + + param->ulDispclk2GfxclkM1 = + le32_to_cpu(profile_v4_2->dispclk2gfxclk_a); + param->ulDispclk2GfxclkM2 = + le32_to_cpu(profile_v4_2->dispclk2gfxclk_b); + param->ulDispclk2GfxclkB = + le32_to_cpu(profile_v4_2->dispclk2gfxclk_c); + param->ulDcefclk2GfxclkM1 = + le32_to_cpu(profile_v4_2->dcefclk2gfxclk_a); + param->ulDcefclk2GfxclkM2 = + le32_to_cpu(profile_v4_2->dcefclk2gfxclk_b); + param->ulDcefclk2GfxclkB = + le32_to_cpu(profile_v4_2->dcefclk2gfxclk_c); + param->ulPixelclk2GfxclkM1 = + le32_to_cpu(profile_v4_2->pixclk2gfxclk_a); + param->ulPixelclk2GfxclkM2 = + le32_to_cpu(profile_v4_2->pixclk2gfxclk_b); + param->ulPixelclk2GfxclkB = + le32_to_cpu(profile_v4_2->pixclk2gfxclk_c); + param->ulPhyclk2GfxclkM1 = + le32_to_cpu(profile->phyclk2gfxclk_a); + param->ulPhyclk2GfxclkM2 = + le32_to_cpu(profile_v4_2->phyclk2gfxclk_b); + param->ulPhyclk2GfxclkB = + le32_to_cpu(profile_v4_2->phyclk2gfxclk_c); + param->ulAcgGbVdroopTableA0 = le32_to_cpu(profile_v4_2->acg_gb_vdroop_table_a0); + param->ulAcgGbVdroopTableA1 = le32_to_cpu(profile_v4_2->acg_gb_vdroop_table_a1); + param->ulAcgGbVdroopTableA2 = le32_to_cpu(profile_v4_2->acg_gb_vdroop_table_a2); + param->ulAcgGbFuseTableM1 = le32_to_cpu(profile_v4_2->acg_avfsgb_fuse_table_m1); + param->ulAcgGbFuseTableM2 = le32_to_cpu(profile_v4_2->acg_avfsgb_fuse_table_m2); + param->ulAcgGbFuseTableB = le32_to_cpu(profile_v4_2->acg_avfsgb_fuse_table_b); + param->ucAcgEnableGbVdroopTable = le32_to_cpu(profile_v4_2->enable_acg_gb_vdroop_table); + param->ucAcgEnableGbFuseTable = le32_to_cpu(profile_v4_2->enable_acg_gb_fuse_table); + } else { + pr_info("Invalid VBIOS AVFS ProfilingInfo Revision!\n"); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h index 81908b5cfd5f11f226df1b9c4054588802394701..8e6b1f0ddebc00769bee23d1b48162421a47d8b5 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h @@ -109,6 +109,14 @@ struct pp_atomfwctrl_avfs_parameters { uint32_t ulPhyclk2GfxclkM1; uint32_t ulPhyclk2GfxclkM2; uint32_t ulPhyclk2GfxclkB; + uint32_t ulAcgGbVdroopTableA0; + uint32_t ulAcgGbVdroopTableA1; + uint32_t ulAcgGbVdroopTableA2; + uint32_t ulAcgGbFuseTableM1; + uint32_t ulAcgGbFuseTableM2; + uint32_t ulAcgGbFuseTableB; + uint32_t ucAcgEnableGbVdroopTable; + uint32_t ucAcgEnableGbFuseTable; }; struct pp_atomfwctrl_gpio_parameters { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c index 4c7f430b36eba81ab13d76932bcabec2b5e50ff7..edc5fb6412d95be624dd3ef7903602c01b27c51d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.c @@ -265,6 +265,15 @@ static int rv_tf_set_clock_limit(struct pp_hwmgr *hwmgr, void *input, } } */ + if (((hwmgr->uvd_arbiter.vclk_soft_min / 100) != rv_data->vclk_soft_min) || + ((hwmgr->uvd_arbiter.dclk_soft_min / 100) != rv_data->dclk_soft_min)) { + rv_data->vclk_soft_min = hwmgr->uvd_arbiter.vclk_soft_min / 100; + rv_data->dclk_soft_min = hwmgr->uvd_arbiter.dclk_soft_min / 100; + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSoftMinVcn, + (rv_data->vclk_soft_min << 16) | rv_data->vclk_soft_min); + } + if((hwmgr->gfx_arbiter.sclk_hard_min != 0) && ((hwmgr->gfx_arbiter.sclk_hard_min / 100) != rv_data->soc_actual_hard_min_freq)) { smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h index afb852295a1573d148fa6eaca387840f14033641..2472b50e54cfb67730176b47676c0ca1cc9f5163 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/rv_hwmgr.h @@ -280,6 +280,8 @@ struct rv_hwmgr { uint32_t f_actual_hard_min_freq; uint32_t fabric_actual_soft_min_freq; + uint32_t vclk_soft_min; + uint32_t dclk_soft_min; uint32_t gfx_actual_soft_min_freq; bool vcn_power_gated; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 1f01020ce3a92a34baa5b3d8257f974008143179..c2743233ba10ed8dd9b55338730fbff9722ae680 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1962,9 +1962,6 @@ static int smu7_thermal_parameter_init(struct pp_hwmgr *hwmgr) temp_reg = PHM_SET_FIELD(temp_reg, CNB_PWRMGT_CNTL, DPM_ENABLED, 0x1); break; default: - PP_ASSERT_WITH_CODE(0, - "Failed to setup PCC HW register! Wrong GPIO assigned for VDDC_PCC_GPIO_PINID!", - ); break; } cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCNB_PWRMGT_CNTL, temp_reg); @@ -4630,6 +4627,15 @@ static int smu7_set_power_profile_state(struct pp_hwmgr *hwmgr, static int smu7_avfs_control(struct pp_hwmgr *hwmgr, bool enable) { + struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); + + if (smu_data == NULL) + return -EINVAL; + + if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + return 0; + if (enable) { if (!PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 197174e562d208e773492f9da2280fd03414c540..9d71a259d97d46fc8008e288f71605f3cb5e23ff 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -78,6 +78,8 @@ uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; #define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L +static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, uint32_t mask); const ULONG PhwVega10_Magic = (ULONG)(PHM_VIslands_Magic); @@ -146,6 +148,19 @@ static void vega10_set_default_registry_data(struct pp_hwmgr *hwmgr) data->registry_data.vr1hot_enabled = 1; data->registry_data.regulator_hot_gpio_support = 1; + data->registry_data.didt_support = 1; + if (data->registry_data.didt_support) { + data->registry_data.didt_mode = 6; + data->registry_data.sq_ramping_support = 1; + data->registry_data.db_ramping_support = 0; + data->registry_data.td_ramping_support = 0; + data->registry_data.tcp_ramping_support = 0; + data->registry_data.dbr_ramping_support = 0; + data->registry_data.edc_didt_support = 1; + data->registry_data.gc_didt_support = 0; + data->registry_data.psm_didt_support = 0; + } + data->display_voltage_mode = PPVEGA10_VEGA10DISPLAYVOLTAGEMODE_DFLT; data->dcef_clk_quad_eqn_a = PPREGKEY_VEGA10QUADRATICEQUATION_DFLT; data->dcef_clk_quad_eqn_b = PPREGKEY_VEGA10QUADRATICEQUATION_DFLT; @@ -222,6 +237,8 @@ static int vega10_set_features_platform_caps(struct pp_hwmgr *hwmgr) /* assume disabled */ phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PowerContainment); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DiDtSupport); phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping); phm_cap_unset(hwmgr->platform_descriptor.platformCaps, @@ -230,6 +247,34 @@ static int vega10_set_features_platform_caps(struct pp_hwmgr *hwmgr) PHM_PlatformCaps_TDRamping); phm_cap_unset(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DBRRamping); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_DiDtEDCEnable); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_GCEDC); + phm_cap_unset(hwmgr->platform_descriptor.platformCaps, + PHM_PlatformCaps_PSM); + + if (data->registry_data.didt_support) { + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DiDtSupport); + if (data->registry_data.sq_ramping_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping); + if (data->registry_data.db_ramping_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping); + if (data->registry_data.td_ramping_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping); + if (data->registry_data.tcp_ramping_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping); + if (data->registry_data.dbr_ramping_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRRamping); + if (data->registry_data.edc_didt_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DiDtEDCEnable); + if (data->registry_data.gc_didt_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_GCEDC); + if (data->registry_data.psm_didt_support) + phm_cap_set(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PSM); + } if (data->registry_data.power_containment_support) phm_cap_set(hwmgr->platform_descriptor.platformCaps, @@ -321,8 +366,8 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) FEATURE_LED_DISPLAY_BIT; data->smu_features[GNLD_FAN_CONTROL].smu_feature_id = FEATURE_FAN_CONTROL_BIT; - data->smu_features[GNLD_VOLTAGE_CONTROLLER].smu_feature_id = - FEATURE_VOLTAGE_CONTROLLER_BIT; + data->smu_features[GNLD_ACG].smu_feature_id = FEATURE_ACG_BIT; + data->smu_features[GNLD_DIDT].smu_feature_id = FEATURE_GFX_EDC_BIT; if (!data->registry_data.prefetcher_dpm_key_disabled) data->smu_features[GNLD_DPM_PREFETCHER].supported = true; @@ -386,6 +431,15 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) if (data->registry_data.vr0hot_enabled) data->smu_features[GNLD_VR0HOT].supported = true; + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_GetSmuVersion); + vega10_read_arg_from_smc(hwmgr->smumgr, &(data->smu_version)); + /* ACG firmware has major version 5 */ + if ((data->smu_version & 0xff000000) == 0x5000000) + data->smu_features[GNLD_ACG].supported = true; + + if (data->registry_data.didt_support) + data->smu_features[GNLD_DIDT].supported = true; + } #ifdef PPLIB_VEGA10_EVV_SUPPORT @@ -2222,6 +2276,21 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m1_shift = 24; pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].m2_shift = 12; pp_table->DisplayClock2Gfxclk[DSPCLK_PHYCLK].b_shift = 12; + + pp_table->AcgBtcGbVdroopTable.a0 = avfs_params.ulAcgGbVdroopTableA0; + pp_table->AcgBtcGbVdroopTable.a0_shift = 20; + pp_table->AcgBtcGbVdroopTable.a1 = avfs_params.ulAcgGbVdroopTableA1; + pp_table->AcgBtcGbVdroopTable.a1_shift = 20; + pp_table->AcgBtcGbVdroopTable.a2 = avfs_params.ulAcgGbVdroopTableA2; + pp_table->AcgBtcGbVdroopTable.a2_shift = 20; + + pp_table->AcgAvfsGb.m1 = avfs_params.ulAcgGbFuseTableM1; + pp_table->AcgAvfsGb.m2 = avfs_params.ulAcgGbFuseTableM2; + pp_table->AcgAvfsGb.b = avfs_params.ulAcgGbFuseTableB; + pp_table->AcgAvfsGb.m1_shift = 0; + pp_table->AcgAvfsGb.m2_shift = 0; + pp_table->AcgAvfsGb.b_shift = 0; + } else { data->smu_features[GNLD_AVFS].supported = false; } @@ -2230,6 +2299,55 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) return 0; } +static int vega10_acg_enable(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = + (struct vega10_hwmgr *)(hwmgr->backend); + uint32_t agc_btc_response; + + if (data->smu_features[GNLD_ACG].supported) { + if (0 == vega10_enable_smc_features(hwmgr->smumgr, true, + data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_bitmap)) + data->smu_features[GNLD_DPM_PREFETCHER].enabled = true; + + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_InitializeAcg); + + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_RunAcgBtc); + vega10_read_arg_from_smc(hwmgr->smumgr, &agc_btc_response); + + if (1 == agc_btc_response) { + if (1 == data->acg_loop_state) + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_RunAcgInClosedLoop); + else if (2 == data->acg_loop_state) + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_RunAcgInOpenLoop); + if (0 == vega10_enable_smc_features(hwmgr->smumgr, true, + data->smu_features[GNLD_ACG].smu_feature_bitmap)) + data->smu_features[GNLD_ACG].enabled = true; + } else { + pr_info("[ACG_Enable] ACG BTC Returned Failed Status!\n"); + data->smu_features[GNLD_ACG].enabled = false; + } + } + + return 0; +} + +static int vega10_acg_disable(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = + (struct vega10_hwmgr *)(hwmgr->backend); + + if (data->smu_features[GNLD_ACG].supported) { + if (data->smu_features[GNLD_ACG].enabled) { + if (0 == vega10_enable_smc_features(hwmgr->smumgr, false, + data->smu_features[GNLD_ACG].smu_feature_bitmap)) + data->smu_features[GNLD_ACG].enabled = false; + } + } + + return 0; +} + static int vega10_populate_gpio_parameters(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = @@ -2404,6 +2522,9 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) pp_table->DisplayDpmVoltageMode = (uint8_t)(table_info->uc_dcef_dpm_voltage_mode); + data->vddc_voltage_table.psi0_enable = voltage_table.psi0_enable; + data->vddc_voltage_table.psi1_enable = voltage_table.psi1_enable; + if (data->registry_data.ulv_support && table_info->us_ulv_voltage_offset) { result = vega10_populate_ulv_state(hwmgr); @@ -2500,7 +2621,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) result = vega10_avfs_enable(hwmgr, true); PP_ASSERT_WITH_CODE(!result, "Attempt to enable AVFS feature Failed!", return result); - + vega10_acg_enable(hwmgr); vega10_save_default_power_profile(hwmgr); return 0; @@ -2832,6 +2953,11 @@ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(!tmp_result, "Failed to start DPM!", result = tmp_result); + /* enable didt, do not abort if failed didt */ + tmp_result = vega10_enable_didt_config(hwmgr); + PP_ASSERT(!tmp_result, + "Failed to enable didt config!"); + tmp_result = vega10_enable_power_containment(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, "Failed to enable power containment!", @@ -3578,10 +3704,22 @@ static void vega10_apply_dal_minimum_voltage_request( return; } +static int vega10_get_soc_index_for_max_uclk(struct pp_hwmgr *hwmgr) +{ + struct phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table_on_mclk; + struct phm_ppt_v2_information *table_info = + (struct phm_ppt_v2_information *)(hwmgr->pptable); + + vdd_dep_table_on_mclk = table_info->vdd_dep_on_mclk; + + return vdd_dep_table_on_mclk->entries[NUM_UCLK_DPM_LEVELS - 1].vddInd + 1; +} + static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + uint32_t socclk_idx; vega10_apply_dal_minimum_voltage_request(hwmgr); @@ -3602,13 +3740,22 @@ static int vega10_upload_dpm_bootup_level(struct pp_hwmgr *hwmgr) if (!data->registry_data.mclk_dpm_key_disabled) { if (data->smc_state_table.mem_boot_level != data->dpm_table.mem_table.dpm_state.soft_min_level) { + if (data->smc_state_table.mem_boot_level == NUM_UCLK_DPM_LEVELS - 1) { + socclk_idx = vega10_get_soc_index_for_max_uclk(hwmgr); PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter( - hwmgr->smumgr, - PPSMC_MSG_SetSoftMinUclkByIndex, - data->smc_state_table.mem_boot_level), - "Failed to set soft min mclk index!", - return -EINVAL); - + hwmgr->smumgr, + PPSMC_MSG_SetSoftMinSocclkByIndex, + socclk_idx), + "Failed to set soft min uclk index!", + return -EINVAL); + } else { + PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc_with_parameter( + hwmgr->smumgr, + PPSMC_MSG_SetSoftMinUclkByIndex, + data->smc_state_table.mem_boot_level), + "Failed to set soft min uclk index!", + return -EINVAL); + } data->dpm_table.mem_table.dpm_state.soft_min_level = data->smc_state_table.mem_boot_level; } @@ -4015,7 +4162,7 @@ static int vega10_notify_smc_display_config_after_ps_adjustment( pr_info("Attempt to set Hard Min for DCEFCLK Failed!"); } } else { - pr_info("Cannot find requested DCEFCLK!"); + pr_debug("Cannot find requested DCEFCLK!"); } if (min_clocks.memoryClock != 0) { @@ -4097,34 +4244,30 @@ static int vega10_unforce_dpm_levels(struct pp_hwmgr *hwmgr) return 0; } -static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, - enum amd_dpm_forced_level level) +static int vega10_get_profiling_clk_mask(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level, + uint32_t *sclk_mask, uint32_t *mclk_mask, uint32_t *soc_mask) { - int ret = 0; + struct phm_ppt_v2_information *table_info = + (struct phm_ppt_v2_information *)(hwmgr->pptable); - switch (level) { - case AMD_DPM_FORCED_LEVEL_HIGH: - ret = vega10_force_dpm_highest(hwmgr); - if (ret) - return ret; - break; - case AMD_DPM_FORCED_LEVEL_LOW: - ret = vega10_force_dpm_lowest(hwmgr); - if (ret) - return ret; - break; - case AMD_DPM_FORCED_LEVEL_AUTO: - ret = vega10_unforce_dpm_levels(hwmgr); - if (ret) - return ret; - break; - default: - break; + if (table_info->vdd_dep_on_sclk->count > VEGA10_UMD_PSTATE_GFXCLK_LEVEL && + table_info->vdd_dep_on_socclk->count > VEGA10_UMD_PSTATE_SOCCLK_LEVEL && + table_info->vdd_dep_on_mclk->count > VEGA10_UMD_PSTATE_MCLK_LEVEL) { + *sclk_mask = VEGA10_UMD_PSTATE_GFXCLK_LEVEL; + *soc_mask = VEGA10_UMD_PSTATE_SOCCLK_LEVEL; + *mclk_mask = VEGA10_UMD_PSTATE_MCLK_LEVEL; } - hwmgr->dpm_level = level; - - return ret; + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) { + *sclk_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { + *mclk_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + *sclk_mask = table_info->vdd_dep_on_sclk->count - 1; + *soc_mask = table_info->vdd_dep_on_socclk->count - 1; + *mclk_mask = table_info->vdd_dep_on_mclk->count - 1; + } + return 0; } static int vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) @@ -4151,6 +4294,86 @@ static int vega10_set_fan_control_mode(struct pp_hwmgr *hwmgr, uint32_t mode) return result; } +static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, + enum amd_dpm_forced_level level) +{ + int ret = 0; + uint32_t sclk_mask = 0; + uint32_t mclk_mask = 0; + uint32_t soc_mask = 0; + uint32_t profile_mode_mask = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + + if (level == hwmgr->dpm_level) + return ret; + + if (!(hwmgr->dpm_level & profile_mode_mask)) { + /* enter profile mode, save current level, disable gfx cg*/ + if (level & profile_mode_mask) { + hwmgr->saved_dpm_level = hwmgr->dpm_level; + cgs_set_clockgating_state(hwmgr->device, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); + } + } else { + /* exit profile mode, restore level, enable gfx cg*/ + if (!(level & profile_mode_mask)) { + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT) + level = hwmgr->saved_dpm_level; + cgs_set_clockgating_state(hwmgr->device, + AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_GATE); + } + } + + switch (level) { + case AMD_DPM_FORCED_LEVEL_HIGH: + ret = vega10_force_dpm_highest(hwmgr); + if (ret) + return ret; + hwmgr->dpm_level = level; + break; + case AMD_DPM_FORCED_LEVEL_LOW: + ret = vega10_force_dpm_lowest(hwmgr); + if (ret) + return ret; + hwmgr->dpm_level = level; + break; + case AMD_DPM_FORCED_LEVEL_AUTO: + ret = vega10_unforce_dpm_levels(hwmgr); + if (ret) + return ret; + hwmgr->dpm_level = level; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + ret = vega10_get_profiling_clk_mask(hwmgr, level, &sclk_mask, &mclk_mask, &soc_mask); + if (ret) + return ret; + hwmgr->dpm_level = level; + vega10_force_clock_level(hwmgr, PP_SCLK, 1<dpm_level = level; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: + default: + break; + } + + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->saved_dpm_level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) + vega10_set_fan_control_mode(hwmgr, AMD_FAN_CTRL_NONE); + else if (level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->saved_dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) + vega10_set_fan_control_mode(hwmgr, AMD_FAN_CTRL_AUTO); + + return 0; +} + static int vega10_get_fan_control_mode(struct pp_hwmgr *hwmgr) { struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); @@ -4396,7 +4619,9 @@ static int vega10_force_clock_level(struct pp_hwmgr *hwmgr, struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); int i; - if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) + if (hwmgr->dpm_level & (AMD_DPM_FORCED_LEVEL_AUTO | + AMD_DPM_FORCED_LEVEL_LOW | + AMD_DPM_FORCED_LEVEL_HIGH)) return -EINVAL; switch (type) { @@ -4661,6 +4886,10 @@ static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to disable power containment!", result = tmp_result); + tmp_result = vega10_disable_didt_config(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to disable didt config!", result = tmp_result); + tmp_result = vega10_avfs_enable(hwmgr, false); PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to disable AVFS!", result = tmp_result); @@ -4677,6 +4906,9 @@ static int vega10_disable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to disable ulv!", result = tmp_result); + tmp_result = vega10_acg_disable(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to disable acg!", result = tmp_result); return result; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index 6e5c5b99593bcb8922546dc4c02983b88019d46b..676cd77358837ad9c604c93352808285049e13a9 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -64,7 +64,9 @@ enum { GNLD_FW_CTF, GNLD_LED_DISPLAY, GNLD_FAN_CONTROL, - GNLD_VOLTAGE_CONTROLLER, + GNLD_FEATURE_FAST_PPT_BIT, + GNLD_DIDT, + GNLD_ACG, GNLD_FEATURES_MAX }; @@ -230,7 +232,9 @@ struct vega10_registry_data { uint8_t cac_support; uint8_t clock_stretcher_support; uint8_t db_ramping_support; + uint8_t didt_mode; uint8_t didt_support; + uint8_t edc_didt_support; uint8_t dynamic_state_patching_support; uint8_t enable_pkg_pwr_tracking_feature; uint8_t enable_tdc_limit_feature; @@ -263,6 +267,9 @@ struct vega10_registry_data { uint8_t tcp_ramping_support; uint8_t tdc_support; uint8_t td_ramping_support; + uint8_t dbr_ramping_support; + uint8_t gc_didt_support; + uint8_t psm_didt_support; uint8_t thermal_out_gpio_support; uint8_t thermal_support; uint8_t fw_ctf_enabled; @@ -381,6 +388,8 @@ struct vega10_hwmgr { struct vega10_smc_state_table smc_state_table; uint32_t config_telemetry; + uint32_t smu_version; + uint32_t acg_loop_state; }; #define VEGA10_DPM2_NEAR_TDP_DEC 10 @@ -425,6 +434,10 @@ struct vega10_hwmgr { #define PPVEGA10_VEGA10UCLKCLKAVERAGEALPHA_DFLT 25 /* 10% * 255 = 25 */ #define PPVEGA10_VEGA10GFXACTIVITYAVERAGEALPHA_DFLT 25 /* 10% * 255 = 25 */ +#define VEGA10_UMD_PSTATE_GFXCLK_LEVEL 0x3 +#define VEGA10_UMD_PSTATE_SOCCLK_LEVEL 0x3 +#define VEGA10_UMD_PSTATE_MCLK_LEVEL 0x2 + extern int tonga_initializa_dynamic_state_adjustment_rule_settings(struct pp_hwmgr *hwmgr); extern int tonga_hwmgr_backend_fini(struct pp_hwmgr *hwmgr); extern int tonga_get_mc_microcode_version (struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index 3f72268e99bb812d780ba1b9b4f52b2211ee3445..e7fa67063cdcbf2b5257b7a86ce58f1c087341ef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -26,7 +26,1298 @@ #include "vega10_powertune.h" #include "vega10_smumgr.h" #include "vega10_ppsmc.h" +#include "vega10_inc.h" #include "pp_debug.h" +#include "pp_soc15.h" + +static const struct vega10_didt_config_reg SEDiDtTuningCtrlConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ */ + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3853 }, + { ixDIDT_SQ_TUNING_CTRL, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3153 }, + + /* DIDT_TD */ + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x0dde }, + { ixDIDT_TD_TUNING_CTRL, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x0dde }, + + /* DIDT_TCP */ + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3dde }, + { ixDIDT_TCP_TUNING_CTRL, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3dde }, + + /* DIDT_DB */ + { ixDIDT_DB_TUNING_CTRL, DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK, DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT, 0x3dde }, + { ixDIDT_DB_TUNING_CTRL, DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK, DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT, 0x3dde }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEDiDtCtrl3Config_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /*DIDT_SQ_CTRL3 */ + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__GC_DIDT_ENABLE_MASK, DIDT_SQ_CTRL3__GC_DIDT_ENABLE__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK, DIDT_SQ_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__THROTTLE_POLICY_MASK, DIDT_SQ_CTRL3__THROTTLE_POLICY__SHIFT, 0x0003 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_SQ_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK, DIDT_SQ_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK, DIDT_SQ_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0003 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK, DIDT_SQ_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK, DIDT_SQ_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__QUALIFY_STALL_EN_MASK, DIDT_SQ_CTRL3__QUALIFY_STALL_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__DIDT_STALL_SEL_MASK, DIDT_SQ_CTRL3__DIDT_STALL_SEL__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__DIDT_FORCE_STALL_MASK, DIDT_SQ_CTRL3__DIDT_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL3, DIDT_SQ_CTRL3__DIDT_STALL_DELAY_EN_MASK, DIDT_SQ_CTRL3__DIDT_STALL_DELAY_EN__SHIFT, 0x0000 }, + + /*DIDT_TCP_CTRL3 */ + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__GC_DIDT_ENABLE_MASK, DIDT_TCP_CTRL3__GC_DIDT_ENABLE__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TCP_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__THROTTLE_POLICY_MASK, DIDT_TCP_CTRL3__THROTTLE_POLICY__SHIFT, 0x0003 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_TCP_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK, DIDT_TCP_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK, DIDT_TCP_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0003 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK, DIDT_TCP_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK, DIDT_TCP_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__QUALIFY_STALL_EN_MASK, DIDT_TCP_CTRL3__QUALIFY_STALL_EN__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__DIDT_STALL_SEL_MASK, DIDT_TCP_CTRL3__DIDT_STALL_SEL__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__DIDT_FORCE_STALL_MASK, DIDT_TCP_CTRL3__DIDT_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL3, DIDT_TCP_CTRL3__DIDT_STALL_DELAY_EN_MASK, DIDT_TCP_CTRL3__DIDT_STALL_DELAY_EN__SHIFT, 0x0000 }, + + /*DIDT_TD_CTRL3 */ + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__GC_DIDT_ENABLE_MASK, DIDT_TD_CTRL3__GC_DIDT_ENABLE__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TD_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__THROTTLE_POLICY_MASK, DIDT_TD_CTRL3__THROTTLE_POLICY__SHIFT, 0x0003 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_TD_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK, DIDT_TD_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK, DIDT_TD_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0003 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK, DIDT_TD_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK, DIDT_TD_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__QUALIFY_STALL_EN_MASK, DIDT_TD_CTRL3__QUALIFY_STALL_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__DIDT_STALL_SEL_MASK, DIDT_TD_CTRL3__DIDT_STALL_SEL__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__DIDT_FORCE_STALL_MASK, DIDT_TD_CTRL3__DIDT_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL3, DIDT_TD_CTRL3__DIDT_STALL_DELAY_EN_MASK, DIDT_TD_CTRL3__DIDT_STALL_DELAY_EN__SHIFT, 0x0000 }, + + /*DIDT_DB_CTRL3 */ + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__GC_DIDT_ENABLE_MASK, DIDT_DB_CTRL3__GC_DIDT_ENABLE__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK, DIDT_DB_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__THROTTLE_POLICY_MASK, DIDT_DB_CTRL3__THROTTLE_POLICY__SHIFT, 0x0003 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_DB_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK, DIDT_DB_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK, DIDT_DB_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0003 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK, DIDT_DB_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK, DIDT_DB_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__QUALIFY_STALL_EN_MASK, DIDT_DB_CTRL3__QUALIFY_STALL_EN__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__DIDT_STALL_SEL_MASK, DIDT_DB_CTRL3__DIDT_STALL_SEL__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__DIDT_FORCE_STALL_MASK, DIDT_DB_CTRL3__DIDT_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL3, DIDT_DB_CTRL3__DIDT_STALL_DELAY_EN_MASK, DIDT_DB_CTRL3__DIDT_STALL_DELAY_EN__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEDiDtCtrl2Config_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ */ + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK, DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3853 }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x00c0 }, + { ixDIDT_SQ_CTRL2, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0000 }, + + /* DIDT_TD */ + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3fff }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x00c0 }, + { ixDIDT_TD_CTRL2, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0001 }, + + /* DIDT_TCP */ + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK, DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3dde }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x00c0 }, + { ixDIDT_TCP_CTRL2, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0001 }, + + /* DIDT_DB */ + { ixDIDT_DB_CTRL2, DIDT_DB_CTRL2__MAX_POWER_DELTA_MASK, DIDT_DB_CTRL2__MAX_POWER_DELTA__SHIFT, 0x3dde }, + { ixDIDT_DB_CTRL2, DIDT_DB_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK, DIDT_DB_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT, 0x00c0 }, + { ixDIDT_DB_CTRL2, DIDT_DB_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK, DIDT_DB_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT, 0x0001 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEDiDtCtrl1Config_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ */ + { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MIN_POWER_MASK, DIDT_SQ_CTRL1__MIN_POWER__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL1, DIDT_SQ_CTRL1__MAX_POWER_MASK, DIDT_SQ_CTRL1__MAX_POWER__SHIFT, 0xffff }, + /* DIDT_TD */ + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MIN_POWER_MASK, DIDT_TD_CTRL1__MIN_POWER__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL1, DIDT_TD_CTRL1__MAX_POWER_MASK, DIDT_TD_CTRL1__MAX_POWER__SHIFT, 0xffff }, + /* DIDT_TCP */ + { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MIN_POWER_MASK, DIDT_TCP_CTRL1__MIN_POWER__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL1, DIDT_TCP_CTRL1__MAX_POWER_MASK, DIDT_TCP_CTRL1__MAX_POWER__SHIFT, 0xffff }, + /* DIDT_DB */ + { ixDIDT_DB_CTRL1, DIDT_DB_CTRL1__MIN_POWER_MASK, DIDT_DB_CTRL1__MIN_POWER__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL1, DIDT_DB_CTRL1__MAX_POWER_MASK, DIDT_DB_CTRL1__MAX_POWER__SHIFT, 0xffff }, + + { 0xFFFFFFFF } /* End of list */ +}; + + +static const struct vega10_didt_config_reg SEDiDtWeightConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ */ + { ixDIDT_SQ_WEIGHT0_3, 0xFFFFFFFF, 0, 0x2B363B1A }, + { ixDIDT_SQ_WEIGHT4_7, 0xFFFFFFFF, 0, 0x270B2432 }, + { ixDIDT_SQ_WEIGHT8_11, 0xFFFFFFFF, 0, 0x00000018 }, + + /* DIDT_TD */ + { ixDIDT_TD_WEIGHT0_3, 0xFFFFFFFF, 0, 0x2B1D220F }, + { ixDIDT_TD_WEIGHT4_7, 0xFFFFFFFF, 0, 0x00007558 }, + { ixDIDT_TD_WEIGHT8_11, 0xFFFFFFFF, 0, 0x00000000 }, + + /* DIDT_TCP */ + { ixDIDT_TCP_WEIGHT0_3, 0xFFFFFFFF, 0, 0x5ACE160D }, + { ixDIDT_TCP_WEIGHT4_7, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_WEIGHT8_11, 0xFFFFFFFF, 0, 0x00000000 }, + + /* DIDT_DB */ + { ixDIDT_DB_WEIGHT0_3, 0xFFFFFFFF, 0, 0x0E152A0F }, + { ixDIDT_DB_WEIGHT4_7, 0xFFFFFFFF, 0, 0x09061813 }, + { ixDIDT_DB_WEIGHT8_11, 0xFFFFFFFF, 0, 0x00000013 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEDiDtCtrl0Config_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ */ + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__PHASE_OFFSET_MASK, DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK, DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_STALL_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_STALL_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_TUNING_CTRL_EN_MASK, DIDT_SQ_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK, DIDT_SQ_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_SQ_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT, 0xffff }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_AUTO_MPD_EN_MASK, DIDT_SQ_CTRL0__DIDT_AUTO_MPD_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_STALL_EVENT_EN_MASK, DIDT_SQ_CTRL0__DIDT_STALL_EVENT_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_CTRL0, DIDT_SQ_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK, DIDT_SQ_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT, 0x0000 }, + /* DIDT_TD */ + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__PHASE_OFFSET_MASK, DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_STALL_CTRL_EN_MASK, DIDT_TD_CTRL0__DIDT_STALL_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_TUNING_CTRL_EN_MASK, DIDT_TD_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK, DIDT_TD_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT, 0x0001 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TD_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT, 0xffff }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_AUTO_MPD_EN_MASK, DIDT_TD_CTRL0__DIDT_AUTO_MPD_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_STALL_EVENT_EN_MASK, DIDT_TD_CTRL0__DIDT_STALL_EVENT_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_CTRL0, DIDT_TD_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK, DIDT_TD_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT, 0x0000 }, + /* DIDT_TCP */ + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__PHASE_OFFSET_MASK, DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK, DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_STALL_CTRL_EN_MASK, DIDT_TCP_CTRL0__DIDT_STALL_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_TUNING_CTRL_EN_MASK, DIDT_TCP_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK, DIDT_TCP_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT, 0x0001 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_TCP_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT, 0xffff }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_AUTO_MPD_EN_MASK, DIDT_TCP_CTRL0__DIDT_AUTO_MPD_EN__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_STALL_EVENT_EN_MASK, DIDT_TCP_CTRL0__DIDT_STALL_EVENT_EN__SHIFT, 0x0000 }, + { ixDIDT_TCP_CTRL0, DIDT_TCP_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK, DIDT_TCP_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT, 0x0000 }, + /* DIDT_DB */ + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK, DIDT_DB_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__PHASE_OFFSET_MASK, DIDT_DB_CTRL0__PHASE_OFFSET__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_CTRL_RST_MASK, DIDT_DB_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_STALL_CTRL_EN_MASK, DIDT_DB_CTRL0__DIDT_STALL_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_TUNING_CTRL_EN_MASK, DIDT_DB_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT, 0x0001 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK, DIDT_DB_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT, 0x0001 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK, DIDT_DB_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT, 0xffff }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_AUTO_MPD_EN_MASK, DIDT_DB_CTRL0__DIDT_AUTO_MPD_EN__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_STALL_EVENT_EN_MASK, DIDT_DB_CTRL0__DIDT_STALL_EVENT_EN__SHIFT, 0x0000 }, + { ixDIDT_DB_CTRL0, DIDT_DB_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK, DIDT_DB_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + + +static const struct vega10_didt_config_reg SEDiDtStallCtrlConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ */ + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0004 }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0004 }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x000a }, + { ixDIDT_SQ_STALL_CTRL, DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x000a }, + + /* DIDT_TD */ + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001 }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001 }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x000a }, + { ixDIDT_TD_STALL_CTRL, DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x000a }, + + /* DIDT_TCP */ + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0001 }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0001 }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x000a }, + { ixDIDT_TCP_STALL_CTRL, DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x000a }, + + /* DIDT_DB */ + { ixDIDT_DB_STALL_CTRL, DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK, DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT, 0x0004 }, + { ixDIDT_DB_STALL_CTRL, DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK, DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT, 0x0004 }, + { ixDIDT_DB_STALL_CTRL, DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK, DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT, 0x000a }, + { ixDIDT_DB_STALL_CTRL, DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK, DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT, 0x000a }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEDiDtStallPatternConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* DIDT_SQ_STALL_PATTERN_1_2 */ + { ixDIDT_SQ_STALL_PATTERN_1_2, DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK, DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT, 0x0001 }, + { ixDIDT_SQ_STALL_PATTERN_1_2, DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK, DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT, 0x0001 }, + + /* DIDT_SQ_STALL_PATTERN_3_4 */ + { ixDIDT_SQ_STALL_PATTERN_3_4, DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK, DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT, 0x0001 }, + { ixDIDT_SQ_STALL_PATTERN_3_4, DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK, DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT, 0x0001 }, + + /* DIDT_SQ_STALL_PATTERN_5_6 */ + { ixDIDT_SQ_STALL_PATTERN_5_6, DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK, DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT, 0x0000 }, + { ixDIDT_SQ_STALL_PATTERN_5_6, DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK, DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT, 0x0000 }, + + /* DIDT_SQ_STALL_PATTERN_7 */ + { ixDIDT_SQ_STALL_PATTERN_7, DIDT_SQ_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK, DIDT_SQ_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT, 0x0000 }, + + /* DIDT_TCP_STALL_PATTERN_1_2 */ + { ixDIDT_TCP_STALL_PATTERN_1_2, DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK, DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT, 0x0001 }, + { ixDIDT_TCP_STALL_PATTERN_1_2, DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK, DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT, 0x0001 }, + + /* DIDT_TCP_STALL_PATTERN_3_4 */ + { ixDIDT_TCP_STALL_PATTERN_3_4, DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK, DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT, 0x0001 }, + { ixDIDT_TCP_STALL_PATTERN_3_4, DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK, DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT, 0x0001 }, + + /* DIDT_TCP_STALL_PATTERN_5_6 */ + { ixDIDT_TCP_STALL_PATTERN_5_6, DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK, DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT, 0x0000 }, + { ixDIDT_TCP_STALL_PATTERN_5_6, DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK, DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT, 0x0000 }, + + /* DIDT_TCP_STALL_PATTERN_7 */ + { ixDIDT_TCP_STALL_PATTERN_7, DIDT_TCP_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK, DIDT_TCP_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT, 0x0000 }, + + /* DIDT_TD_STALL_PATTERN_1_2 */ + { ixDIDT_TD_STALL_PATTERN_1_2, DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK, DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT, 0x0001 }, + { ixDIDT_TD_STALL_PATTERN_1_2, DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK, DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT, 0x0001 }, + + /* DIDT_TD_STALL_PATTERN_3_4 */ + { ixDIDT_TD_STALL_PATTERN_3_4, DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK, DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT, 0x0001 }, + { ixDIDT_TD_STALL_PATTERN_3_4, DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK, DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT, 0x0001 }, + + /* DIDT_TD_STALL_PATTERN_5_6 */ + { ixDIDT_TD_STALL_PATTERN_5_6, DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK, DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT, 0x0000 }, + { ixDIDT_TD_STALL_PATTERN_5_6, DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK, DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT, 0x0000 }, + + /* DIDT_TD_STALL_PATTERN_7 */ + { ixDIDT_TD_STALL_PATTERN_7, DIDT_TD_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK, DIDT_TD_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT, 0x0000 }, + + /* DIDT_DB_STALL_PATTERN_1_2 */ + { ixDIDT_DB_STALL_PATTERN_1_2, DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK, DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT, 0x0001 }, + { ixDIDT_DB_STALL_PATTERN_1_2, DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK, DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT, 0x0001 }, + + /* DIDT_DB_STALL_PATTERN_3_4 */ + { ixDIDT_DB_STALL_PATTERN_3_4, DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK, DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT, 0x0001 }, + { ixDIDT_DB_STALL_PATTERN_3_4, DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK, DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT, 0x0001 }, + + /* DIDT_DB_STALL_PATTERN_5_6 */ + { ixDIDT_DB_STALL_PATTERN_5_6, DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK, DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT, 0x0000 }, + { ixDIDT_DB_STALL_PATTERN_5_6, DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK, DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT, 0x0000 }, + + /* DIDT_DB_STALL_PATTERN_7 */ + { ixDIDT_DB_STALL_PATTERN_7, DIDT_DB_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK, DIDT_DB_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SELCacConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x00060021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x00860021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x01060021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x01860021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x02060021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x02860021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x03060021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x03860021 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x04060021 }, + /* TD */ + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x000E0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x008E0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x010E0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x018E0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x020E0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x028E0020 }, + /* TCP */ + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x001c0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x009c0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x011c0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x019c0020 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x021c0020 }, + /* DB */ + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x00200008 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x00820008 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x01020008 }, + { ixSE_CAC_CNTL, 0xFFFFFFFF, 0, 0x01820008 }, + + { 0xFFFFFFFF } /* End of list */ +}; + + +static const struct vega10_didt_config_reg SEEDCStallPatternConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixDIDT_SQ_EDC_STALL_PATTERN_1_2, 0xFFFFFFFF, 0, 0x00030001 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_3_4, 0xFFFFFFFF, 0, 0x000F0007 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_5_6, 0xFFFFFFFF, 0, 0x003F001F }, + { ixDIDT_SQ_EDC_STALL_PATTERN_7, 0xFFFFFFFF, 0, 0x0000007F }, + /* TD */ + { ixDIDT_TD_EDC_STALL_PATTERN_1_2, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_PATTERN_3_4, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_PATTERN_5_6, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_PATTERN_7, 0xFFFFFFFF, 0, 0x00000000 }, + /* TCP */ + { ixDIDT_TCP_EDC_STALL_PATTERN_1_2, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_EDC_STALL_PATTERN_3_4, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_EDC_STALL_PATTERN_5_6, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_EDC_STALL_PATTERN_7, 0xFFFFFFFF, 0, 0x00000000 }, + /* DB */ + { ixDIDT_DB_EDC_STALL_PATTERN_1_2, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_DB_EDC_STALL_PATTERN_3_4, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_DB_EDC_STALL_PATTERN_5_6, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_DB_EDC_STALL_PATTERN_7, 0xFFFFFFFF, 0, 0x00000000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEEDCForceStallPatternConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixDIDT_SQ_EDC_STALL_PATTERN_1_2, 0xFFFFFFFF, 0, 0x00000015 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_3_4, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_5_6, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_7, 0xFFFFFFFF, 0, 0x00000000 }, + /* TD */ + { ixDIDT_TD_EDC_STALL_PATTERN_1_2, 0xFFFFFFFF, 0, 0x00000015 }, + { ixDIDT_TD_EDC_STALL_PATTERN_3_4, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_PATTERN_5_6, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_PATTERN_7, 0xFFFFFFFF, 0, 0x00000000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEEDCStallDelayConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixDIDT_SQ_EDC_STALL_DELAY_1, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_2, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_3, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_4, 0xFFFFFFFF, 0, 0x00000000 }, + /* TD */ + { ixDIDT_TD_EDC_STALL_DELAY_1, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_DELAY_2, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_DELAY_3, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TD_EDC_STALL_DELAY_4, 0xFFFFFFFF, 0, 0x00000000 }, + /* TCP */ + { ixDIDT_TCP_EDC_STALL_DELAY_1, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_EDC_STALL_DELAY_2, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_EDC_STALL_DELAY_3, 0xFFFFFFFF, 0, 0x00000000 }, + { ixDIDT_TCP_EDC_STALL_DELAY_4, 0xFFFFFFFF, 0, 0x00000000 }, + /* DB */ + { ixDIDT_DB_EDC_STALL_DELAY_1, 0xFFFFFFFF, 0, 0x00000000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEEDCThresholdConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { ixDIDT_SQ_EDC_THRESHOLD, 0xFFFFFFFF, 0, 0x0000010E }, + { ixDIDT_TD_EDC_THRESHOLD, 0xFFFFFFFF, 0, 0xFFFFFFFF }, + { ixDIDT_TCP_EDC_THRESHOLD, 0xFFFFFFFF, 0, 0xFFFFFFFF }, + { ixDIDT_DB_EDC_THRESHOLD, 0xFFFFFFFF, 0, 0xFFFFFFFF }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEEDCCtrlResetConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_EN_MASK, DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK, DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEEDCCtrlConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_EN_MASK, DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK, DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0004 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0006 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg SEEDCCtrlForceStallConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ */ + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_EN_MASK, DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK, DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT, 0x000C }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT, 0x0001 }, + + /* TD */ + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_EN_MASK, DIDT_TD_EDC_CTRL__EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_SW_RST_MASK, DIDT_TD_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, DIDT_TD_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_FORCE_STALL_MASK, DIDT_TD_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0001 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_TD_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0001 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK, DIDT_TD_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT, 0x000E }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, DIDT_TD_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__GC_EDC_EN_MASK, DIDT_TD_EDC_CTRL__GC_EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__GC_EDC_STALL_POLICY_MASK, DIDT_TD_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK, DIDT_TD_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_TD_EDC_CTRL, DIDT_TD_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK, DIDT_TD_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT, 0x0001 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg GCDiDtDroopCtrlConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { mmGC_DIDT_DROOP_CTRL, GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_EN_MASK, GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_EN__SHIFT, 0x0000 }, + { mmGC_DIDT_DROOP_CTRL, GC_DIDT_DROOP_CTRL__DIDT_DROOP_THRESHOLD_MASK, GC_DIDT_DROOP_CTRL__DIDT_DROOP_THRESHOLD__SHIFT, 0x0000 }, + { mmGC_DIDT_DROOP_CTRL, GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_INDEX_MASK, GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_INDEX__SHIFT, 0x0000 }, + { mmGC_DIDT_DROOP_CTRL, GC_DIDT_DROOP_CTRL__DIDT_LEVEL_SEL_MASK, GC_DIDT_DROOP_CTRL__DIDT_LEVEL_SEL__SHIFT, 0x0000 }, + { mmGC_DIDT_DROOP_CTRL, GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_OVERFLOW_MASK, GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_OVERFLOW__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg GCDiDtCtrl0Config_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { mmGC_DIDT_CTRL0, GC_DIDT_CTRL0__DIDT_CTRL_EN_MASK, GC_DIDT_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0000 }, + { mmGC_DIDT_CTRL0, GC_DIDT_CTRL0__PHASE_OFFSET_MASK, GC_DIDT_CTRL0__PHASE_OFFSET__SHIFT, 0x0000 }, + { mmGC_DIDT_CTRL0, GC_DIDT_CTRL0__DIDT_SW_RST_MASK, GC_DIDT_CTRL0__DIDT_SW_RST__SHIFT, 0x0000 }, + { mmGC_DIDT_CTRL0, GC_DIDT_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK, GC_DIDT_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { mmGC_DIDT_CTRL0, GC_DIDT_CTRL0__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK, GC_DIDT_CTRL0__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { 0xFFFFFFFF } /* End of list */ +}; + + +static const struct vega10_didt_config_reg PSMSEEDCStallPatternConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ EDC STALL PATTERNs */ + { ixDIDT_SQ_EDC_STALL_PATTERN_1_2, DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_1_MASK, DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_1__SHIFT, 0x0101 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_1_2, DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_2_MASK, DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_2__SHIFT, 0x0101 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_3_4, DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_3_MASK, DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_3__SHIFT, 0x1111 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_3_4, DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_4_MASK, DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_4__SHIFT, 0x1111 }, + + { ixDIDT_SQ_EDC_STALL_PATTERN_5_6, DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_5_MASK, DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_5__SHIFT, 0x1515 }, + { ixDIDT_SQ_EDC_STALL_PATTERN_5_6, DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_6_MASK, DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_6__SHIFT, 0x1515 }, + + { ixDIDT_SQ_EDC_STALL_PATTERN_7, DIDT_SQ_EDC_STALL_PATTERN_7__EDC_STALL_PATTERN_7_MASK, DIDT_SQ_EDC_STALL_PATTERN_7__EDC_STALL_PATTERN_7__SHIFT, 0x5555 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMSEEDCStallDelayConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ EDC STALL DELAYs */ + { ixDIDT_SQ_EDC_STALL_DELAY_1, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ0_MASK, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ0__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_1, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ1_MASK, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ1__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_1, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ2_MASK, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ2__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_1, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ3_MASK, DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ3__SHIFT, 0x0000 }, + + { ixDIDT_SQ_EDC_STALL_DELAY_2, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ4_MASK, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ4__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_2, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ5_MASK, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ5__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_2, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ6_MASK, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ6__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_2, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ7_MASK, DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ7__SHIFT, 0x0000 }, + + { ixDIDT_SQ_EDC_STALL_DELAY_3, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ8_MASK, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ8__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_3, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ9_MASK, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ9__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_3, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ10_MASK, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ10__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_3, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ11_MASK, DIDT_SQ_EDC_STALL_DELAY_3__EDC_STALL_DELAY_SQ11__SHIFT, 0x0000 }, + + { ixDIDT_SQ_EDC_STALL_DELAY_4, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ12_MASK, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ12__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_4, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ12_MASK, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ13__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_4, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ14_MASK, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ14__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_STALL_DELAY_4, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ15_MASK, DIDT_SQ_EDC_STALL_DELAY_4__EDC_STALL_DELAY_SQ15__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMSEEDCThresholdConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ EDC THRESHOLD */ + { ixDIDT_SQ_EDC_THRESHOLD, DIDT_SQ_EDC_THRESHOLD__EDC_THRESHOLD_MASK, DIDT_SQ_EDC_THRESHOLD__EDC_THRESHOLD__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMSEEDCCtrlResetConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ EDC CTRL */ + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_EN_MASK, DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK, DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMSEEDCCtrlConfig_Vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + /* SQ EDC CTRL */ + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_EN_MASK, DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK, DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK, DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK, DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT, 0x000E }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT, 0x0003 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT, 0x0001 }, + { ixDIDT_SQ_EDC_CTRL, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK, DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMGCEDCThresholdConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { mmGC_EDC_THRESHOLD, GC_EDC_THRESHOLD__EDC_THRESHOLD_MASK, GC_EDC_THRESHOLD__EDC_THRESHOLD__SHIFT, 0x0000000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMGCEDCDroopCtrlConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { mmGC_EDC_DROOP_CTRL, GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_EN_MASK, GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_EN__SHIFT, 0x0001 }, + { mmGC_EDC_DROOP_CTRL, GC_EDC_DROOP_CTRL__EDC_DROOP_THRESHOLD_MASK, GC_EDC_DROOP_CTRL__EDC_DROOP_THRESHOLD__SHIFT, 0x0384 }, + { mmGC_EDC_DROOP_CTRL, GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_INDEX_MASK, GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_INDEX__SHIFT, 0x0001 }, + { mmGC_EDC_DROOP_CTRL, GC_EDC_DROOP_CTRL__AVG_PSM_SEL_MASK, GC_EDC_DROOP_CTRL__AVG_PSM_SEL__SHIFT, 0x0001 }, + { mmGC_EDC_DROOP_CTRL, GC_EDC_DROOP_CTRL__EDC_LEVEL_SEL_MASK, GC_EDC_DROOP_CTRL__EDC_LEVEL_SEL__SHIFT, 0x0001 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMGCEDCCtrlResetConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_EN_MASK, GC_EDC_CTRL__EDC_EN__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_SW_RST_MASK, GC_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0001 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_FORCE_STALL_MASK, GC_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg PSMGCEDCCtrlConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_EN_MASK, GC_EDC_CTRL__EDC_EN__SHIFT, 0x0001 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_SW_RST_MASK, GC_EDC_CTRL__EDC_SW_RST__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK, GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_FORCE_STALL_MASK, GC_EDC_CTRL__EDC_FORCE_STALL__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK, GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT, 0x0000 }, + { mmGC_EDC_CTRL, GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK, GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT, 0x0000 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg AvfsPSMResetConfig_vega10[]= +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { 0x16A02, 0xFFFFFFFF, 0x0, 0x0000005F }, + { 0x16A05, 0xFFFFFFFF, 0x0, 0x00000001 }, + { 0x16A06, 0x00000001, 0x0, 0x02000000 }, + { 0x16A01, 0xFFFFFFFF, 0x0, 0x00003027 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static const struct vega10_didt_config_reg AvfsPSMInitConfig_vega10[] = +{ +/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + * Offset Mask Shift Value + * --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + */ + { 0x16A05, 0xFFFFFFFF, 0x18, 0x00000001 }, + { 0x16A05, 0xFFFFFFFF, 0x8, 0x00000003 }, + { 0x16A05, 0xFFFFFFFF, 0xa, 0x00000006 }, + { 0x16A05, 0xFFFFFFFF, 0x7, 0x00000000 }, + { 0x16A06, 0xFFFFFFFF, 0x18, 0x00000001 }, + { 0x16A06, 0xFFFFFFFF, 0x19, 0x00000001 }, + { 0x16A01, 0xFFFFFFFF, 0x0, 0x00003027 }, + + { 0xFFFFFFFF } /* End of list */ +}; + +static int vega10_program_didt_config_registers(struct pp_hwmgr *hwmgr, const struct vega10_didt_config_reg *config_regs, enum vega10_didt_config_reg_type reg_type) +{ + uint32_t data; + + PP_ASSERT_WITH_CODE((config_regs != NULL), "[vega10_program_didt_config_registers] Invalid config register table!", return -EINVAL); + + while (config_regs->offset != 0xFFFFFFFF) { + switch (reg_type) { + case VEGA10_CONFIGREG_DIDT: + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset); + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset, data); + break; + case VEGA10_CONFIGREG_GCCAC: + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset); + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset, data); + break; + case VEGA10_CONFIGREG_SECAC: + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG_SE_CAC, config_regs->offset); + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG_SE_CAC, config_regs->offset, data); + break; + default: + return -EINVAL; + } + + config_regs++; + } + + return 0; +} + +static int vega10_program_gc_didt_config_registers(struct pp_hwmgr *hwmgr, const struct vega10_didt_config_reg *config_regs) +{ + uint32_t data; + + while (config_regs->offset != 0xFFFFFFFF) { + data = cgs_read_register(hwmgr->device, config_regs->offset); + data &= ~config_regs->mask; + data |= ((config_regs->value << config_regs->shift) & config_regs->mask); + cgs_write_register(hwmgr->device, config_regs->offset, data); + config_regs++; + } + + return 0; +} + +static void vega10_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable) +{ + uint32_t data; + int result; + uint32_t en = (enable ? 1 : 0); + uint32_t didt_block_info = SQ_IR_MASK | TCP_IR_MASK | TD_PCC_MASK; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_CTRL0); + data &= ~DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK; + data |= ((en << DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_CTRL0, data); + didt_block_info &= ~SQ_Enable_MASK; + didt_block_info |= en << SQ_Enable_SHIFT; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_CTRL0); + data &= ~DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK; + data |= ((en << DIDT_DB_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_CTRL0, data); + didt_block_info &= ~DB_Enable_MASK; + didt_block_info |= en << DB_Enable_SHIFT; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_CTRL0); + data &= ~DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK; + data |= ((en << DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_CTRL0, data); + didt_block_info &= ~TD_Enable_MASK; + didt_block_info |= en << TD_Enable_SHIFT; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_CTRL0); + data &= ~DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK; + data |= ((en << DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_CTRL0, data); + didt_block_info &= ~TCP_Enable_MASK; + didt_block_info |= en << TCP_Enable_SHIFT; + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_CTRL0); + data &= ~DIDT_DBR_CTRL0__DIDT_CTRL_EN_MASK; + data |= ((en << DIDT_DBR_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_DBR_CTRL0__DIDT_CTRL_EN_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_CTRL0, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DiDtEDCEnable)) { + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL); + data &= ~DIDT_SQ_EDC_CTRL__EDC_EN_MASK; + data |= ((en << DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT) & DIDT_SQ_EDC_CTRL__EDC_EN_MASK); + data &= ~DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK; + data |= ((~en << DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT) & DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL); + data &= ~DIDT_DB_EDC_CTRL__EDC_EN_MASK; + data |= ((en << DIDT_DB_EDC_CTRL__EDC_EN__SHIFT) & DIDT_DB_EDC_CTRL__EDC_EN_MASK); + data &= ~DIDT_DB_EDC_CTRL__EDC_SW_RST_MASK; + data |= ((~en << DIDT_DB_EDC_CTRL__EDC_SW_RST__SHIFT) & DIDT_DB_EDC_CTRL__EDC_SW_RST_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL); + data &= ~DIDT_TD_EDC_CTRL__EDC_EN_MASK; + data |= ((en << DIDT_TD_EDC_CTRL__EDC_EN__SHIFT) & DIDT_TD_EDC_CTRL__EDC_EN_MASK); + data &= ~DIDT_TD_EDC_CTRL__EDC_SW_RST_MASK; + data |= ((~en << DIDT_TD_EDC_CTRL__EDC_SW_RST__SHIFT) & DIDT_TD_EDC_CTRL__EDC_SW_RST_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL); + data &= ~DIDT_TCP_EDC_CTRL__EDC_EN_MASK; + data |= ((en << DIDT_TCP_EDC_CTRL__EDC_EN__SHIFT) & DIDT_TCP_EDC_CTRL__EDC_EN_MASK); + data &= ~DIDT_TCP_EDC_CTRL__EDC_SW_RST_MASK; + data |= ((~en << DIDT_TCP_EDC_CTRL__EDC_SW_RST__SHIFT) & DIDT_TCP_EDC_CTRL__EDC_SW_RST_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRRamping)) { + data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL); + data &= ~DIDT_DBR_EDC_CTRL__EDC_EN_MASK; + data |= ((en << DIDT_DBR_EDC_CTRL__EDC_EN__SHIFT) & DIDT_DBR_EDC_CTRL__EDC_EN_MASK); + data &= ~DIDT_DBR_EDC_CTRL__EDC_SW_RST_MASK; + data |= ((~en << DIDT_DBR_EDC_CTRL__EDC_SW_RST__SHIFT) & DIDT_DBR_EDC_CTRL__EDC_SW_RST_MASK); + cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL, data); + } + } + + if (enable) { + /* For Vega10, SMC does not support any mask yet. */ + result = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_ConfigureGfxDidt, didt_block_info); + PP_ASSERT((0 == result), "[EnableDiDtConfig] SMC Configure Gfx Didt Failed!"); + } +} + +static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) +{ + int result; + uint32_t num_se = 0, count, data; + struct cgs_system_info sys_info = {0}; + uint32_t reg; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_GFX_SE_INFO; + if (cgs_query_system_info(hwmgr->device, &sys_info) == 0) + num_se = sys_info.value; + + cgs_enter_safe_mode(hwmgr->device, true); + + cgs_lock_grbm_idx(hwmgr->device, true); + reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + for (count = 0; count < num_se; count++) { + data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); + cgs_write_register(hwmgr->device, reg, data); + + result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtCtrl1Config_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtCtrl2Config_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtCtrl3Config_vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtTuningCtrlConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SELCacConfig_Vega10, VEGA10_CONFIGREG_SECAC); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtCtrl0Config_Vega10, VEGA10_CONFIGREG_DIDT); + + if (0 != result) + break; + } + cgs_write_register(hwmgr->device, reg, 0xE0000000); + cgs_lock_grbm_idx(hwmgr->device, false); + + vega10_didt_set_mask(hwmgr, true); + + cgs_enter_safe_mode(hwmgr->device, false); + + return 0; +} + +static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) +{ + cgs_enter_safe_mode(hwmgr->device, true); + + vega10_didt_set_mask(hwmgr, false); + + cgs_enter_safe_mode(hwmgr->device, false); + + return 0; +} + +static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) +{ + int result; + uint32_t num_se = 0, count, data; + struct cgs_system_info sys_info = {0}; + uint32_t reg; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_GFX_SE_INFO; + if (cgs_query_system_info(hwmgr->device, &sys_info) == 0) + num_se = sys_info.value; + + cgs_enter_safe_mode(hwmgr->device, true); + + cgs_lock_grbm_idx(hwmgr->device, true); + reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + for (count = 0; count < num_se; count++) { + data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); + cgs_write_register(hwmgr->device, reg, data); + + result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtCtrl3Config_vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEDiDtCtrl0Config_Vega10, VEGA10_CONFIGREG_DIDT); + if (0 != result) + break; + } + cgs_write_register(hwmgr->device, reg, 0xE0000000); + cgs_lock_grbm_idx(hwmgr->device, false); + + vega10_didt_set_mask(hwmgr, true); + + cgs_enter_safe_mode(hwmgr->device, false); + + vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10); + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_GCEDC)) + vega10_program_gc_didt_config_registers(hwmgr, GCDiDtCtrl0Config_vega10); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PSM)) + vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMInitConfig_vega10); + + return 0; +} + +static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) +{ + uint32_t data; + + cgs_enter_safe_mode(hwmgr->device, true); + + vega10_didt_set_mask(hwmgr, false); + + cgs_enter_safe_mode(hwmgr->device, false); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_GCEDC)) { + data = 0x00000000; + cgs_write_register(hwmgr->device, mmGC_DIDT_CTRL0, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PSM)) + vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); + + return 0; +} + +static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) +{ + int result; + uint32_t num_se = 0, count, data; + struct cgs_system_info sys_info = {0}; + uint32_t reg; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_GFX_SE_INFO; + if (cgs_query_system_info(hwmgr->device, &sys_info) == 0) + num_se = sys_info.value; + + cgs_enter_safe_mode(hwmgr->device, true); + + cgs_lock_grbm_idx(hwmgr->device, true); + reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + for (count = 0; count < num_se; count++) { + data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); + cgs_write_register(hwmgr->device, reg, data); + result = vega10_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEEDCThresholdConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlConfig_Vega10, VEGA10_CONFIGREG_DIDT); + + if (0 != result) + break; + } + cgs_write_register(hwmgr->device, reg, 0xE0000000); + cgs_lock_grbm_idx(hwmgr->device, false); + + vega10_didt_set_mask(hwmgr, true); + + cgs_enter_safe_mode(hwmgr->device, false); + + return 0; +} + +static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr) +{ + cgs_enter_safe_mode(hwmgr->device, true); + + vega10_didt_set_mask(hwmgr, false); + + cgs_enter_safe_mode(hwmgr->device, false); + + return 0; +} + +static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) +{ + int result; + uint32_t num_se = 0; + uint32_t count, data; + struct cgs_system_info sys_info = {0}; + uint32_t reg; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_GFX_SE_INFO; + if (cgs_query_system_info(hwmgr->device, &sys_info) == 0) + num_se = sys_info.value; + + cgs_enter_safe_mode(hwmgr->device, true); + + vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); + + cgs_lock_grbm_idx(hwmgr->device, true); + reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + for (count = 0; count < num_se; count++) { + data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT); + cgs_write_register(hwmgr->device, reg, data); + result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlConfig_Vega10, VEGA10_CONFIGREG_DIDT); + + if (0 != result) + break; + } + cgs_write_register(hwmgr->device, reg, 0xE0000000); + cgs_lock_grbm_idx(hwmgr->device, false); + + vega10_didt_set_mask(hwmgr, true); + + cgs_enter_safe_mode(hwmgr->device, false); + + vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_GCEDC)) { + vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCCtrlResetConfig_vega10); + vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCCtrlConfig_vega10); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PSM)) + vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMInitConfig_vega10); + + return 0; +} + +static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) +{ + uint32_t data; + + cgs_enter_safe_mode(hwmgr->device, true); + + vega10_didt_set_mask(hwmgr, false); + + cgs_enter_safe_mode(hwmgr->device, false); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_GCEDC)) { + data = 0x00000000; + cgs_write_register(hwmgr->device, mmGC_EDC_CTRL, data); + } + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PSM)) + vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); + + return 0; +} + +static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) +{ + uint32_t reg; + int result; + + cgs_enter_safe_mode(hwmgr->device, true); + + cgs_lock_grbm_idx(hwmgr->device, true); + reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX); + cgs_write_register(hwmgr->device, reg, 0xE0000000); + cgs_lock_grbm_idx(hwmgr->device, false); + + result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); + result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT); + if (0 != result) + return result; + + vega10_didt_set_mask(hwmgr, false); + + cgs_enter_safe_mode(hwmgr->device, false); + + return 0; +} + +static int vega10_disable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) +{ + int result; + + result = vega10_disable_se_edc_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDtConfig] Pre DIDT disable clock gating failed!", return result); + + return 0; +} + +int vega10_enable_didt_config(struct pp_hwmgr *hwmgr) +{ + int result = 0; + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + + if (data->smu_features[GNLD_DIDT].supported) { + if (data->smu_features[GNLD_DIDT].enabled) + PP_DBG_LOG("[EnableDiDtConfig] Feature DiDt Already enabled!\n"); + + switch (data->registry_data.didt_mode) { + case 0: + result = vega10_enable_cac_driving_se_didt_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 0 Failed!", return result); + break; + case 2: + result = vega10_enable_psm_gc_didt_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 2 Failed!", return result); + break; + case 3: + result = vega10_enable_se_edc_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 3 Failed!", return result); + break; + case 1: + case 4: + case 5: + result = vega10_enable_psm_gc_edc_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 5 Failed!", return result); + break; + case 6: + result = vega10_enable_se_edc_force_stall_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 6 Failed!", return result); + break; + default: + result = -EINVAL; + break; + } + + if (0 == result) { + PP_ASSERT_WITH_CODE((!vega10_enable_smc_features(hwmgr->smumgr, true, data->smu_features[GNLD_DIDT].smu_feature_bitmap)), + "[EnableDiDtConfig] Attempt to Enable DiDt feature Failed!", return result); + data->smu_features[GNLD_DIDT].enabled = true; + } + } + + return result; +} + +int vega10_disable_didt_config(struct pp_hwmgr *hwmgr) +{ + int result = 0; + struct vega10_hwmgr *data = (struct vega10_hwmgr *)(hwmgr->backend); + + if (data->smu_features[GNLD_DIDT].supported) { + if (!data->smu_features[GNLD_DIDT].enabled) + PP_DBG_LOG("[DisableDiDtConfig] Feature DiDt Already Disabled!\n"); + + switch (data->registry_data.didt_mode) { + case 0: + result = vega10_disable_cac_driving_se_didt_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 0 Failed!", return result); + break; + case 2: + result = vega10_disable_psm_gc_didt_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 2 Failed!", return result); + break; + case 3: + result = vega10_disable_se_edc_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 3 Failed!", return result); + break; + case 1: + case 4: + case 5: + result = vega10_disable_psm_gc_edc_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 5 Failed!", return result); + break; + case 6: + result = vega10_disable_se_edc_force_stall_config(hwmgr); + PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 6 Failed!", return result); + break; + default: + result = -EINVAL; + break; + } + + if (0 == result) { + PP_ASSERT_WITH_CODE((0 != vega10_enable_smc_features(hwmgr->smumgr, false, data->smu_features[GNLD_DIDT].smu_feature_bitmap)), + "[DisableDiDtConfig] Attempt to Disable DiDt feature Failed!", return result); + data->smu_features[GNLD_DIDT].enabled = false; + } + } + + return result; +} void vega10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr) { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h index 9ecaa27c0bb547e1c72707c4a562f19123b7ff76..b95771ab89cd19130f6ecd6d51025a3ef53b5340 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.h @@ -31,6 +31,12 @@ enum vega10_pt_config_reg_type { VEGA10_CONFIGREG_MAX }; +enum vega10_didt_config_reg_type { + VEGA10_CONFIGREG_DIDT = 0, + VEGA10_CONFIGREG_GCCAC, + VEGA10_CONFIGREG_SECAC +}; + /* PowerContainment Features */ #define POWERCONTAINMENT_FEATURE_DTE 0x00000001 #define POWERCONTAINMENT_FEATURE_TDCLimit 0x00000002 @@ -44,6 +50,13 @@ struct vega10_pt_config_reg { enum vega10_pt_config_reg_type type; }; +struct vega10_didt_config_reg { + uint32_t offset; + uint32_t mask; + uint32_t shift; + uint32_t value; +}; + struct vega10_pt_defaults { uint8_t SviLoadLineEn; uint8_t SviLoadLineVddC; @@ -62,5 +75,8 @@ int vega10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n); int vega10_power_control_set_level(struct pp_hwmgr *hwmgr); int vega10_disable_power_containment(struct pp_hwmgr *hwmgr); +int vega10_enable_didt_config(struct pp_hwmgr *hwmgr); +int vega10_disable_didt_config(struct pp_hwmgr *hwmgr); + #endif /* _VEGA10_POWERTUNE_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c index 1623644ea49acef8f8c4053b396fd26e8bd703f8..e343df1903754fc19e0f2ac082f8b8369c5f1201 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c @@ -31,6 +31,8 @@ #include "cgs_common.h" #include "vega10_pptable.h" +#define NUM_DSPCLK_LEVELS 8 + static void set_hw_cap(struct pp_hwmgr *hwmgr, bool enable, enum phm_platform_caps cap) { @@ -644,11 +646,11 @@ static int get_gfxclk_voltage_dependency_table( return 0; } -static int get_dcefclk_voltage_dependency_table( +static int get_pix_clk_voltage_dependency_table( struct pp_hwmgr *hwmgr, struct phm_ppt_v1_clock_voltage_dependency_table **pp_vega10_clk_dep_table, - const ATOM_Vega10_DCEFCLK_Dependency_Table *clk_dep_table) + const ATOM_Vega10_PIXCLK_Dependency_Table *clk_dep_table) { uint32_t table_size, i; struct phm_ppt_v1_clock_voltage_dependency_table @@ -681,6 +683,76 @@ static int get_dcefclk_voltage_dependency_table( return 0; } +static int get_dcefclk_voltage_dependency_table( + struct pp_hwmgr *hwmgr, + struct phm_ppt_v1_clock_voltage_dependency_table + **pp_vega10_clk_dep_table, + const ATOM_Vega10_DCEFCLK_Dependency_Table *clk_dep_table) +{ + uint32_t table_size, i; + uint8_t num_entries; + struct phm_ppt_v1_clock_voltage_dependency_table + *clk_table; + struct cgs_system_info sys_info = {0}; + uint32_t dev_id; + uint32_t rev_id; + + PP_ASSERT_WITH_CODE((clk_dep_table->ucNumEntries != 0), + "Invalid PowerPlay Table!", return -1); + +/* + * workaround needed to add another DPM level for pioneer cards + * as VBIOS is locked down. + * This DPM level was added to support 3DPM monitors @ 4K120Hz + * + */ + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; + cgs_query_system_info(hwmgr->device, &sys_info); + dev_id = (uint32_t)sys_info.value; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_REV; + cgs_query_system_info(hwmgr->device, &sys_info); + rev_id = (uint32_t)sys_info.value; + + if (dev_id == 0x6863 && rev_id == 0 && + clk_dep_table->entries[clk_dep_table->ucNumEntries - 1].ulClk < 90000) + num_entries = clk_dep_table->ucNumEntries + 1 > NUM_DSPCLK_LEVELS ? + NUM_DSPCLK_LEVELS : clk_dep_table->ucNumEntries + 1; + else + num_entries = clk_dep_table->ucNumEntries; + + + table_size = sizeof(uint32_t) + + sizeof(phm_ppt_v1_clock_voltage_dependency_record) * + num_entries; + + clk_table = (struct phm_ppt_v1_clock_voltage_dependency_table *) + kzalloc(table_size, GFP_KERNEL); + + if (!clk_table) + return -ENOMEM; + + clk_table->count = (uint32_t)num_entries; + + for (i = 0; i < clk_dep_table->ucNumEntries; i++) { + clk_table->entries[i].vddInd = + clk_dep_table->entries[i].ucVddInd; + clk_table->entries[i].clk = + le32_to_cpu(clk_dep_table->entries[i].ulClk); + } + + if (i < num_entries) { + clk_table->entries[i].vddInd = clk_dep_table->entries[i-1].ucVddInd; + clk_table->entries[i].clk = 90000; + } + + *pp_vega10_clk_dep_table = clk_table; + + return 0; +} + static int get_pcie_table(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_pcie_table **vega10_pcie_table, const Vega10_PPTable_Generic_SubTable_Header *table) @@ -862,21 +934,21 @@ static int init_powerplay_extended_tables( gfxclk_dep_table); if (!result && powerplay_table->usPixclkDependencyTableOffset) - result = get_dcefclk_voltage_dependency_table(hwmgr, + result = get_pix_clk_voltage_dependency_table(hwmgr, &pp_table_info->vdd_dep_on_pixclk, - (const ATOM_Vega10_DCEFCLK_Dependency_Table*) + (const ATOM_Vega10_PIXCLK_Dependency_Table*) pixclk_dep_table); if (!result && powerplay_table->usPhyClkDependencyTableOffset) - result = get_dcefclk_voltage_dependency_table(hwmgr, + result = get_pix_clk_voltage_dependency_table(hwmgr, &pp_table_info->vdd_dep_on_phyclk, - (const ATOM_Vega10_DCEFCLK_Dependency_Table *) + (const ATOM_Vega10_PIXCLK_Dependency_Table *) phyclk_dep_table); if (!result && powerplay_table->usDispClkDependencyTableOffset) - result = get_dcefclk_voltage_dependency_table(hwmgr, + result = get_pix_clk_voltage_dependency_table(hwmgr, &pp_table_info->vdd_dep_on_dispclk, - (const ATOM_Vega10_DCEFCLK_Dependency_Table *) + (const ATOM_Vega10_PIXCLK_Dependency_Table *) dispclk_dep_table); if (!result && powerplay_table->usDcefclkDependencyTableOffset) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index e7ab8eb8a0cfae1c9b60c582a4cbee4b5b49e567..d44243441d284a80bc5abe398bf1bb9d0f3f875c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -321,10 +321,7 @@ int vega10_fan_ctrl_reset_fan_speed_to_default(struct pp_hwmgr *hwmgr) if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) { - result = vega10_fan_ctrl_set_static_mode(hwmgr, - FDO_PWM_MODE_STATIC); - if (!result) - result = vega10_fan_ctrl_start_smc_fan_control(hwmgr); + result = vega10_fan_ctrl_start_smc_fan_control(hwmgr); } else result = vega10_fan_ctrl_set_default_mode(hwmgr); @@ -633,7 +630,6 @@ int tf_vega10_thermal_start_smc_fan_control(struct pp_hwmgr *hwmgr, if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_MicrocodeFanControl)) { vega10_fan_ctrl_start_smc_fan_control(hwmgr); - vega10_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC); } return 0; diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index a1ebe1014492058e07658b512ec4ac3bc6db2c1a..a4c8b09b6f1464eb92950dfbe12c838fa5bdf34f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -164,9 +164,14 @@ enum phm_platform_caps { PHM_PlatformCaps_EnablePlatformPowerManagement, /* indicates that Platform Power Management feature is supported */ PHM_PlatformCaps_SurpriseRemoval, /* indicates that surprise removal feature is requested */ PHM_PlatformCaps_NewCACVoltage, /* indicates new CAC voltage table support */ + PHM_PlatformCaps_DiDtSupport, /* for dI/dT feature */ PHM_PlatformCaps_DBRamping, /* for dI/dT feature */ PHM_PlatformCaps_TDRamping, /* for dI/dT feature */ PHM_PlatformCaps_TCPRamping, /* for dI/dT feature */ + PHM_PlatformCaps_DBRRamping, /* for dI/dT feature */ + PHM_PlatformCaps_DiDtEDCEnable, /* for dI/dT feature */ + PHM_PlatformCaps_GCEDC, /* for dI/dT feature */ + PHM_PlatformCaps_PSM, /* for dI/dT feature */ PHM_PlatformCaps_EnableSMU7ThermalManagement, /* SMC will manage thermal events */ PHM_PlatformCaps_FPS, /* FPS support */ PHM_PlatformCaps_ACP, /* ACP support */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 47e57bd2c36f9b747c4a7632a1d488efcd13e40e..91b0105e82403c448cee8a6c303828f392e87eb8 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -128,6 +128,8 @@ struct phm_uvd_arbiter { uint32_t dclk; uint32_t vclk_ceiling; uint32_t dclk_ceiling; + uint32_t vclk_soft_min; + uint32_t dclk_soft_min; }; struct phm_vce_arbiter { diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_debug.h b/drivers/gpu/drm/amd/powerplay/inc/pp_debug.h index f3f9ebb631a5f3383142956e1793ebd8d0936500..822cd8b5bf90d1982c860e42a44e440b9dc69c5f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_debug.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_debug.h @@ -42,6 +42,12 @@ } \ } while (0) +#define PP_ASSERT(cond, msg) \ + do { \ + if (!(cond)) { \ + pr_warn("%s\n", msg); \ + } \ + } while (0) #define PP_DBG_LOG(fmt, ...) \ do { \ diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h b/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h index 227d999b6bd11ca5be538a87cfaf471f6d81a871..a511611ec7e0c0190fde321ba9e6cbc9e236bf9c 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h +++ b/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h @@ -41,6 +41,8 @@ inline static uint32_t soc15_get_register_offset( reg = MP1_BASE.instance[inst].segment[segment] + offset; else if (hw_id == DF_HWID) reg = DF_BASE.instance[inst].segment[segment] + offset; + else if (hw_id == GC_HWID) + reg = GC_BASE.instance[inst].segment[segment] + offset; return reg; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h index e0e106f1b23a0de38a8222dc9d220afb04d549e2..901c960cfe21999787ea9d1b46dee00e5872c2da 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h @@ -66,7 +66,12 @@ #define PPSMC_MSG_SetMinVddcrSocVoltage 0x22 #define PPSMC_MSG_SetMinVideoFclkFreq 0x23 #define PPSMC_MSG_SetMinDeepSleepDcefclk 0x24 -#define PPSMC_Message_Count 0x25 +#define PPSMC_MSG_ForcePowerDownGfx 0x25 +#define PPSMC_MSG_SetPhyclkVoltageByFreq 0x26 +#define PPSMC_MSG_SetDppclkVoltageByFreq 0x27 +#define PPSMC_MSG_SetSoftMinVcn 0x28 +#define PPSMC_Message_Count 0x29 + typedef uint16_t PPSMC_Result; typedef int PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu9.h b/drivers/gpu/drm/amd/powerplay/inc/smu9.h index 9ef2490c7c2e009f9a613abbc953b463baba1af7..550ed675027ad79bebb5261a434beeeae8b316ae 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu9.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu9.h @@ -55,9 +55,9 @@ #define FEATURE_FW_CTF_BIT 23 #define FEATURE_LED_DISPLAY_BIT 24 #define FEATURE_FAN_CONTROL_BIT 25 -#define FEATURE_VOLTAGE_CONTROLLER_BIT 26 -#define FEATURE_SPARE_27_BIT 27 -#define FEATURE_SPARE_28_BIT 28 +#define FEATURE_FAST_PPT_BIT 26 +#define FEATURE_GFX_EDC_BIT 27 +#define FEATURE_ACG_BIT 28 #define FEATURE_SPARE_29_BIT 29 #define FEATURE_SPARE_30_BIT 30 #define FEATURE_SPARE_31_BIT 31 @@ -90,9 +90,10 @@ #define FFEATURE_FW_CTF_MASK (1 << FEATURE_FW_CTF_BIT ) #define FFEATURE_LED_DISPLAY_MASK (1 << FEATURE_LED_DISPLAY_BIT ) #define FFEATURE_FAN_CONTROL_MASK (1 << FEATURE_FAN_CONTROL_BIT ) -#define FFEATURE_VOLTAGE_CONTROLLER_MASK (1 << FEATURE_VOLTAGE_CONTROLLER_BIT ) -#define FFEATURE_SPARE_27_MASK (1 << FEATURE_SPARE_27_BIT ) -#define FFEATURE_SPARE_28_MASK (1 << FEATURE_SPARE_28_BIT ) + +#define FEATURE_FAST_PPT_MASK (1 << FAST_PPT_BIT ) +#define FEATURE_GFX_EDC_MASK (1 << FEATURE_GFX_EDC_BIT ) +#define FEATURE_ACG_MASK (1 << FEATURE_ACG_BIT ) #define FFEATURE_SPARE_29_MASK (1 << FEATURE_SPARE_29_BIT ) #define FFEATURE_SPARE_30_MASK (1 << FEATURE_SPARE_30_BIT ) #define FFEATURE_SPARE_31_MASK (1 << FEATURE_SPARE_31_BIT ) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h index 532186b6f941f4cc1e3256f7ad6ad392ced4e020..f6d6c61f796a463abfd8dfb2be0129c6c2579740 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h @@ -312,7 +312,10 @@ typedef struct { PllSetting_t GfxBoostState; - uint32_t Reserved[14]; + uint8_t AcgEnable[NUM_GFXCLK_DPM_LEVELS]; + GbVdroopTable_t AcgBtcGbVdroopTable; + QuadraticInt_t AcgAvfsGb; + uint32_t Reserved[4]; /* Padding - ignore */ uint32_t MmHubPadding[7]; /* SMU internal use */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 976e942ec69433f10c32dd29aee4617974a349c3..5d61cc9d45544ad817702d3c777f10d31557eeaf 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -131,6 +131,7 @@ struct pp_smumgr_func { bool (*is_dpm_running)(struct pp_hwmgr *hwmgr); int (*populate_requested_graphic_levels)(struct pp_hwmgr *hwmgr, struct amd_pp_profile *request); + bool (*is_hw_avfs_present)(struct pp_smumgr *smumgr); }; struct pp_smumgr { @@ -202,6 +203,8 @@ extern bool smum_is_dpm_running(struct pp_hwmgr *hwmgr); extern int smum_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, struct amd_pp_profile *request); +extern bool smum_is_hw_avfs_present(struct pp_smumgr *smumgr); + #define SMUM_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define SMUM_FIELD_MASK(reg, field) reg##__##field##_MASK diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h index b4af9e85dfa532e7db728253cbd00a2de9a1c3b3..cb070ebc7de196f185415c6c02afd27c66e24a6d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h @@ -124,6 +124,10 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_NumOfDisplays 0x56 #define PPSMC_MSG_ReadSerialNumTop32 0x58 #define PPSMC_MSG_ReadSerialNumBottom32 0x59 +#define PPSMC_MSG_RunAcgBtc 0x5C +#define PPSMC_MSG_RunAcgInClosedLoop 0x5D +#define PPSMC_MSG_RunAcgInOpenLoop 0x5E +#define PPSMC_MSG_InitializeAcg 0x5F #define PPSMC_MSG_GetCurrPkgPwr 0x61 #define PPSMC_Message_Count 0x62 diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c index 6a320b27aefd17ce96cee015d8e2db82b1dacd91..8712f093d6d90afcc3b64274c71424d55ec2af61 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.c @@ -2129,6 +2129,25 @@ int fiji_thermal_setup_fan_table(struct pp_hwmgr *hwmgr) return 0; } + +int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr) +{ + int ret; + struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); + + if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS) + return 0; + + ret = smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs); + + if (!ret) + /* If this param is not changed, this function could fire unnecessarily */ + smu_data->avfs.avfs_btc_status = AVFS_BTC_COMPLETED_PREVIOUSLY; + + return ret; +} + static int fiji_program_mem_timing_parameters(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h index 0e9e1f2d7238da3741324fd478630f6a700344c7..d9c72d992e302777521a7b853f4d75e7725e96e6 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smc.h @@ -48,5 +48,6 @@ int fiji_initialize_mc_reg_table(struct pp_hwmgr *hwmgr); bool fiji_is_dpm_running(struct pp_hwmgr *hwmgr); int fiji_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, struct amd_pp_profile *request); +int fiji_thermal_avfs_enable(struct pp_hwmgr *hwmgr); #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index a1cb78552cf69372997cbb1c1cb36e92d825095b..6ae948fc524f72e1f916f4e256fcd992b1d92690 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -161,56 +161,47 @@ static int fiji_start_smu_in_non_protection_mode(struct pp_smumgr *smumgr) static int fiji_setup_pwr_virus(struct pp_smumgr *smumgr) { - int i, result = -1; + int i; + int result = -EINVAL; uint32_t reg, data; - const PWR_Command_Table *virus = PwrVirusTable; - struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); - priv->avfs.AvfsBtcStatus = AVFS_LOAD_VIRUS; - for (i = 0; (i < PWR_VIRUS_TABLE_SIZE); i++) { - switch (virus->command) { + const PWR_Command_Table *pvirus = PwrVirusTable; + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); + + for (i = 0; i < PWR_VIRUS_TABLE_SIZE; i++) { + switch (pvirus->command) { case PwrCmdWrite: - reg = virus->reg; - data = virus->data; + reg = pvirus->reg; + data = pvirus->data; cgs_write_register(smumgr->device, reg, data); break; + case PwrCmdEnd: - priv->avfs.AvfsBtcStatus = AVFS_BTC_VIRUS_LOADED; result = 0; break; + default: - pr_err("Table Exit with Invalid Command!"); - priv->avfs.AvfsBtcStatus = AVFS_BTC_VIRUS_FAIL; - result = -1; + pr_info("Table Exit with Invalid Command!"); + smu_data->avfs.avfs_btc_status = AVFS_BTC_VIRUS_FAIL; + result = -EINVAL; break; } - virus++; + pvirus++; } + return result; } static int fiji_start_avfs_btc(struct pp_smumgr *smumgr) { int result = 0; - struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); - priv->avfs.AvfsBtcStatus = AVFS_BTC_STARTED; - if (priv->avfs.AvfsBtcParam) { - if (!smum_send_msg_to_smc_with_parameter(smumgr, - PPSMC_MSG_PerformBtc, priv->avfs.AvfsBtcParam)) { - if (!smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs)) { - priv->avfs.AvfsBtcStatus = AVFS_BTC_COMPLETED_UNSAVED; - result = 0; - } else { - pr_err("[AVFS][fiji_start_avfs_btc] Attempt" - " to Enable AVFS Failed!"); - smum_send_msg_to_smc(smumgr, PPSMC_MSG_DisableAvfs); - result = -1; - } - } else { - pr_err("[AVFS][fiji_start_avfs_btc] " - "PerformBTC SMU msg failed"); - result = -1; + if (0 != smu_data->avfs.avfs_btc_param) { + if (0 != smu7_send_msg_to_smc_with_parameter(smumgr, + PPSMC_MSG_PerformBtc, smu_data->avfs.avfs_btc_param)) { + pr_info("[AVFS][Fiji_PerformBtc] PerformBTC SMU msg failed"); + result = -EINVAL; } } /* Soft-Reset to reset the engine before loading uCode */ @@ -224,42 +215,6 @@ static int fiji_start_avfs_btc(struct pp_smumgr *smumgr) return result; } -static int fiji_setup_pm_fuse_for_avfs(struct pp_smumgr *smumgr) -{ - int result = 0; - uint32_t table_start; - uint32_t charz_freq_addr, inversion_voltage_addr, charz_freq; - uint16_t inversion_voltage; - - charz_freq = 0x30750000; /* In 10KHz units 0x00007530 Actual value */ - inversion_voltage = 0x1A04; /* mV Q14.2 0x41A Actual value */ - - PP_ASSERT_WITH_CODE(0 == smu7_read_smc_sram_dword(smumgr, - SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU73_Firmware_Header, - PmFuseTable), &table_start, 0x40000), - "[AVFS][Fiji_SetupGfxLvlStruct] SMU could not communicate " - "starting address of PmFuse structure", - return -1;); - - charz_freq_addr = table_start + - offsetof(struct SMU73_Discrete_PmFuses, PsmCharzFreq); - inversion_voltage_addr = table_start + - offsetof(struct SMU73_Discrete_PmFuses, InversionVoltage); - - result = smu7_copy_bytes_to_smc(smumgr, charz_freq_addr, - (uint8_t *)(&charz_freq), sizeof(charz_freq), 0x40000); - PP_ASSERT_WITH_CODE(0 == result, - "[AVFS][fiji_setup_pm_fuse_for_avfs] charz_freq could not " - "be populated.", return -1;); - - result = smu7_copy_bytes_to_smc(smumgr, inversion_voltage_addr, - (uint8_t *)(&inversion_voltage), sizeof(inversion_voltage), 0x40000); - PP_ASSERT_WITH_CODE(0 == result, "[AVFS][fiji_setup_pm_fuse_for_avfs] " - "charz_freq could not be populated.", return -1;); - - return result; -} - static int fiji_setup_graphics_level_structure(struct pp_smumgr *smumgr) { int32_t vr_config; @@ -298,93 +253,41 @@ static int fiji_setup_graphics_level_structure(struct pp_smumgr *smumgr) return 0; } -/* Work in Progress */ -static int fiji_restore_vft_table(struct pp_smumgr *smumgr) -{ - struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); - - if (AVFS_BTC_COMPLETED_SAVED == priv->avfs.AvfsBtcStatus) { - priv->avfs.AvfsBtcStatus = AVFS_BTC_COMPLETED_RESTORED; - return 0; - } else - return -EINVAL; -} - -/* Work in Progress */ -static int fiji_save_vft_table(struct pp_smumgr *smumgr) -{ - struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); - - if (AVFS_BTC_COMPLETED_SAVED == priv->avfs.AvfsBtcStatus) { - priv->avfs.AvfsBtcStatus = AVFS_BTC_COMPLETED_RESTORED; - return 0; - } else - return -EINVAL; -} - static int fiji_avfs_event_mgr(struct pp_smumgr *smumgr, bool smu_started) { - struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); - switch (priv->avfs.AvfsBtcStatus) { - case AVFS_BTC_COMPLETED_SAVED: /*S3 State - Pre SMU Start */ - priv->avfs.AvfsBtcStatus = AVFS_BTC_RESTOREVFT_FAILED; - PP_ASSERT_WITH_CODE(0 == fiji_restore_vft_table(smumgr), - "[AVFS][fiji_avfs_event_mgr] Could not Copy Graphics " - "Level table over to SMU", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_COMPLETED_RESTORED; - break; - case AVFS_BTC_COMPLETED_RESTORED: /*S3 State - Post SMU Start*/ - priv->avfs.AvfsBtcStatus = AVFS_BTC_SMUMSG_ERROR; - PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(smumgr, - 0x666), - "[AVFS][fiji_avfs_event_mgr] SMU did not respond " - "correctly to VftTableIsValid Msg", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_SMUMSG_ERROR; - PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(smumgr, - PPSMC_MSG_EnableAvfs), - "[AVFS][fiji_avfs_event_mgr] SMU did not respond " - "correctly to EnableAvfs Message Msg", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_COMPLETED_SAVED; + switch (smu_data->avfs.avfs_btc_status) { + case AVFS_BTC_COMPLETED_PREVIOUSLY: break; + case AVFS_BTC_BOOT: /*Cold Boot State - Post SMU Start*/ if (!smu_started) break; - priv->avfs.AvfsBtcStatus = AVFS_BTC_FAILED; - PP_ASSERT_WITH_CODE(0 == fiji_setup_pm_fuse_for_avfs(smumgr), - "[AVFS][fiji_avfs_event_mgr] Failure at " - "fiji_setup_pm_fuse_for_avfs", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_DPMTABLESETUP_FAILED; + smu_data->avfs.avfs_btc_status = AVFS_BTC_FAILED; PP_ASSERT_WITH_CODE(0 == fiji_setup_graphics_level_structure(smumgr), "[AVFS][fiji_avfs_event_mgr] Could not Copy Graphics Level" " table over to SMU", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_VIRUS_FAIL; + return -EINVAL;); + smu_data->avfs.avfs_btc_status = AVFS_BTC_VIRUS_FAIL; PP_ASSERT_WITH_CODE(0 == fiji_setup_pwr_virus(smumgr), "[AVFS][fiji_avfs_event_mgr] Could not setup " "Pwr Virus for AVFS ", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_FAILED; + return -EINVAL;); + smu_data->avfs.avfs_btc_status = AVFS_BTC_FAILED; PP_ASSERT_WITH_CODE(0 == fiji_start_avfs_btc(smumgr), "[AVFS][fiji_avfs_event_mgr] Failure at " "fiji_start_avfs_btc. AVFS Disabled", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_SAVEVFT_FAILED; - PP_ASSERT_WITH_CODE(0 == fiji_save_vft_table(smumgr), - "[AVFS][fiji_avfs_event_mgr] Could not save VFT Table", - return -1;); - priv->avfs.AvfsBtcStatus = AVFS_BTC_COMPLETED_SAVED; + return -EINVAL;); + + smu_data->avfs.avfs_btc_status = AVFS_BTC_ENABLEAVFS; break; case AVFS_BTC_DISABLED: /* Do nothing */ - break; case AVFS_BTC_NOTSUPPORTED: /* Do nothing */ + case AVFS_BTC_ENABLEAVFS: break; default: - pr_err("[AVFS] Something is broken. See log!"); + pr_err("AVFS failed status is %x !\n", smu_data->avfs.avfs_btc_status); break; } return 0; @@ -477,19 +380,6 @@ static int fiji_smu_init(struct pp_smumgr *smumgr) if (smu7_init(smumgr)) return -EINVAL; - fiji_priv->avfs.AvfsBtcStatus = AVFS_BTC_BOOT; - if (fiji_is_hw_avfs_present(smumgr)) - /* AVFS Parameter - * 0 - BTC DC disabled, BTC AC disabled - * 1 - BTC DC enabled, BTC AC disabled - * 2 - BTC DC disabled, BTC AC enabled - * 3 - BTC DC enabled, BTC AC enabled - * Default is 0 - BTC DC disabled, BTC AC disabled - */ - fiji_priv->avfs.AvfsBtcParam = 0; - else - fiji_priv->avfs.AvfsBtcStatus = AVFS_BTC_NOTSUPPORTED; - for (i = 0; i < SMU73_MAX_LEVELS_GRAPHICS; i++) fiji_priv->activity_target[i] = 30; @@ -514,10 +404,12 @@ const struct pp_smumgr_func fiji_smu_funcs = { .init_smc_table = fiji_init_smc_table, .update_sclk_threshold = fiji_update_sclk_threshold, .thermal_setup_fan_table = fiji_thermal_setup_fan_table, + .thermal_avfs_enable = fiji_thermal_avfs_enable, .populate_all_graphic_levels = fiji_populate_all_graphic_levels, .populate_all_memory_levels = fiji_populate_all_memory_levels, .get_mac_definition = fiji_get_mac_definition, .initialize_mc_reg_table = fiji_initialize_mc_reg_table, .is_dpm_running = fiji_is_dpm_running, .populate_requested_graphic_levels = fiji_populate_requested_graphic_levels, + .is_hw_avfs_present = fiji_is_hw_avfs_present, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h index adcbdfb209be477dcffd12333ac52b81fffe6a1e..175bf9f8ef9cd9ba25e9ed86db56dc71e490b2f4 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h @@ -28,17 +28,8 @@ #include "smu7_smumgr.h" - -struct fiji_smu_avfs { - enum AVFS_BTC_STATUS AvfsBtcStatus; - uint32_t AvfsBtcParam; -}; - - struct fiji_smumgr { struct smu7_smumgr smu7_data; - - struct fiji_smu_avfs avfs; struct SMU73_Discrete_DpmTable smc_state_table; struct SMU73_Discrete_Ulv ulv_setting; struct SMU73_Discrete_PmFuses power_tune_table; @@ -47,7 +38,5 @@ struct fiji_smumgr { }; - - #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c index f68e759e8be287956609314dfd4134739660ca56..99a00bd39256586b501414fcca02b13d198c8c1e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c @@ -1498,7 +1498,7 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) table_info->vdd_dep_on_sclk; - if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) + if (((struct smu7_smumgr *)smu_data)->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) return result; result = atomctrl_get_avfs_information(hwmgr, &avfs_params); @@ -1889,7 +1889,7 @@ int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr) { int ret; struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr); - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index 9616cedc139cebe35557adb324bb39c8087d5c4d..75f43dadc56ba95838ca0d33ed6a71573d47ca50 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -60,16 +60,14 @@ static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = { static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 = { 0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00}; - static int polaris10_setup_pwr_virus(struct pp_smumgr *smumgr) { int i; - int result = -1; + int result = -EINVAL; uint32_t reg, data; const PWR_Command_Table *pvirus = pwr_virus_table; - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend); - + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); for (i = 0; i < PWR_VIRUS_TABLE_SIZE; i++) { switch (pvirus->command) { @@ -86,7 +84,7 @@ static int polaris10_setup_pwr_virus(struct pp_smumgr *smumgr) default: pr_info("Table Exit with Invalid Command!"); smu_data->avfs.avfs_btc_status = AVFS_BTC_VIRUS_FAIL; - result = -1; + result = -EINVAL; break; } pvirus++; @@ -98,7 +96,7 @@ static int polaris10_setup_pwr_virus(struct pp_smumgr *smumgr) static int polaris10_perform_btc(struct pp_smumgr *smumgr) { int result = 0; - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); if (0 != smu_data->avfs.avfs_btc_param) { if (0 != smu7_send_msg_to_smc_with_parameter(smumgr, PPSMC_MSG_PerformBtc, smu_data->avfs.avfs_btc_param)) { @@ -172,10 +170,11 @@ static int polaris10_setup_graphics_level_structure(struct pp_smumgr *smumgr) return 0; } + static int polaris10_avfs_event_mgr(struct pp_smumgr *smumgr, bool SMU_VFT_INTACT) { - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend); + struct smu7_smumgr *smu_data = (struct smu7_smumgr *)(smumgr->backend); switch (smu_data->avfs.avfs_btc_status) { case AVFS_BTC_COMPLETED_PREVIOUSLY: @@ -185,30 +184,31 @@ polaris10_avfs_event_mgr(struct pp_smumgr *smumgr, bool SMU_VFT_INTACT) smu_data->avfs.avfs_btc_status = AVFS_BTC_DPMTABLESETUP_FAILED; PP_ASSERT_WITH_CODE(0 == polaris10_setup_graphics_level_structure(smumgr), - "[AVFS][Polaris10_AVFSEventMgr] Could not Copy Graphics Level table over to SMU", - return -1); + "[AVFS][Polaris10_AVFSEventMgr] Could not Copy Graphics Level table over to SMU", + return -EINVAL); if (smu_data->avfs.avfs_btc_param > 1) { pr_info("[AVFS][Polaris10_AVFSEventMgr] AC BTC has not been successfully verified on Fiji. There may be in this setting."); smu_data->avfs.avfs_btc_status = AVFS_BTC_VIRUS_FAIL; - PP_ASSERT_WITH_CODE(-1 == polaris10_setup_pwr_virus(smumgr), + PP_ASSERT_WITH_CODE(0 == polaris10_setup_pwr_virus(smumgr), "[AVFS][Polaris10_AVFSEventMgr] Could not setup Pwr Virus for AVFS ", - return -1); + return -EINVAL); } smu_data->avfs.avfs_btc_status = AVFS_BTC_FAILED; PP_ASSERT_WITH_CODE(0 == polaris10_perform_btc(smumgr), "[AVFS][Polaris10_AVFSEventMgr] Failure at SmuPolaris10_PerformBTC. AVFS Disabled", - return -1); - + return -EINVAL); + smu_data->avfs.avfs_btc_status = AVFS_BTC_ENABLEAVFS; break; case AVFS_BTC_DISABLED: + case AVFS_BTC_ENABLEAVFS: case AVFS_BTC_NOTSUPPORTED: break; default: - pr_info("[AVFS] Something is broken. See log!"); + pr_err("AVFS failed status is %x!\n", smu_data->avfs.avfs_btc_status); break; } @@ -376,11 +376,6 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr) if (smu7_init(smumgr)) return -EINVAL; - if (polaris10_is_hw_avfs_present(smumgr)) - smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT; - else - smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED; - for (i = 0; i < SMU74_MAX_LEVELS_GRAPHICS; i++) smu_data->activity_target[i] = PPPOLARIS10_TARGETACTIVITY_DFLT; @@ -410,4 +405,5 @@ const struct pp_smumgr_func polaris10_smu_funcs = { .get_mac_definition = polaris10_get_mac_definition, .is_dpm_running = polaris10_is_dpm_running, .populate_requested_graphic_levels = polaris10_populate_requested_graphic_levels, + .is_hw_avfs_present = polaris10_is_hw_avfs_present, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h index 49ebf1d5a53c9a0194eead44ccfd605414a385a8..5e19c24b0561fd0d3ba082bf5f05a4820a4d4926 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.h @@ -32,11 +32,6 @@ #define SMC_RAM_END 0x40000 -struct polaris10_avfs { - enum AVFS_BTC_STATUS avfs_btc_status; - uint32_t avfs_btc_param; -}; - struct polaris10_pt_defaults { uint8_t SviLoadLineEn; uint8_t SviLoadLineVddC; @@ -51,8 +46,6 @@ struct polaris10_pt_defaults { uint16_t BAPMTI_RC[SMU74_DTE_ITERATIONS * SMU74_DTE_SOURCES * SMU74_DTE_SINKS]; }; - - struct polaris10_range_table { uint32_t trans_lower_frequency; /* in 10khz */ uint32_t trans_upper_frequency; @@ -61,14 +54,13 @@ struct polaris10_range_table { struct polaris10_smumgr { struct smu7_smumgr smu7_data; uint8_t protected_mode; - struct polaris10_avfs avfs; SMU74_Discrete_DpmTable smc_state_table; struct SMU74_Discrete_Ulv ulv_setting; struct SMU74_Discrete_PmFuses power_tune_table; struct polaris10_range_table range_table[NUM_SCLK_RANGE]; const struct polaris10_pt_defaults *power_tune_defaults; - uint32_t activity_target[SMU74_MAX_LEVELS_GRAPHICS]; - uint32_t bif_sclk_table[SMU74_MAX_LEVELS_LINK]; + uint32_t activity_target[SMU74_MAX_LEVELS_GRAPHICS]; + uint32_t bif_sclk_table[SMU74_MAX_LEVELS_LINK]; }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c index 35ac276814150a4fcf91e798a4de7283acf01855..76347ff6d6554b447e8e066c7ac81ef85562d4bc 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c @@ -540,7 +540,6 @@ int smu7_upload_smu_firmware_image(struct pp_smumgr *smumgr) return result; } - int smu7_init(struct pp_smumgr *smumgr) { struct smu7_smumgr *smu_data; @@ -596,6 +595,11 @@ int smu7_init(struct pp_smumgr *smumgr) (cgs_handle_t)smu_data->smu_buffer.handle); return -EINVAL); + if (smum_is_hw_avfs_present(smumgr)) + smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT; + else + smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED; + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h index 919be435b49c890368ca0f3dc6906fb3f90ddf2f..ee5e32d2921ed8576a78f6ba6b389461503d9ee2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h @@ -37,6 +37,11 @@ struct smu7_buffer_entry { unsigned long handle; }; +struct smu7_avfs { + enum AVFS_BTC_STATUS avfs_btc_status; + uint32_t avfs_btc_param; +}; + struct smu7_smumgr { uint8_t *header; uint8_t *mec_image; @@ -50,7 +55,8 @@ struct smu7_smumgr { uint32_t arb_table_start; uint32_t ulv_setting_starts; uint8_t security_hard_key; - uint32_t acpi_optimization; + uint32_t acpi_optimization; + struct smu7_avfs avfs; }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index bcc61ffd13cb1913a61e8ce82a5855358181525d..3bdf6478de7fac4ef77f88030458f38ffb5fe269 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -43,7 +43,8 @@ MODULE_FIRMWARE("amdgpu/polaris11_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin"); MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_smc.bin"); - +MODULE_FIRMWARE("amdgpu/vega10_smc.bin"); +MODULE_FIRMWARE("amdgpu/vega10_acg_smc.bin"); int smum_early_init(struct pp_instance *handle) { @@ -403,3 +404,11 @@ int smum_populate_requested_graphic_levels(struct pp_hwmgr *hwmgr, return 0; } + +bool smum_is_hw_avfs_present(struct pp_smumgr *smumgr) +{ + if (smumgr->smumgr_funcs->is_hw_avfs_present) + return smumgr->smumgr_funcs->is_hw_avfs_present(smumgr); + + return false; +} diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c index 269678443862d8e446a6256b6bb91c3d8789dc8d..408514c965a015cd1ea025957eb21370d5209960 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c @@ -356,6 +356,9 @@ int vega10_set_tools_address(struct pp_smumgr *smumgr) static int vega10_verify_smc_interface(struct pp_smumgr *smumgr) { uint32_t smc_driver_if_version; + struct cgs_system_info sys_info = {0}; + uint32_t dev_id; + uint32_t rev_id; PP_ASSERT_WITH_CODE(!vega10_send_msg_to_smc(smumgr, PPSMC_MSG_GetDriverIfVersion), @@ -363,12 +366,27 @@ static int vega10_verify_smc_interface(struct pp_smumgr *smumgr) return -EINVAL); vega10_read_arg_from_smc(smumgr, &smc_driver_if_version); - if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) { - pr_err("Your firmware(0x%x) doesn't match \ - SMU9_DRIVER_IF_VERSION(0x%x). \ - Please update your firmware!\n", - smc_driver_if_version, SMU9_DRIVER_IF_VERSION); - return -EINVAL; + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_DEV; + cgs_query_system_info(smumgr->device, &sys_info); + dev_id = (uint32_t)sys_info.value; + + sys_info.size = sizeof(struct cgs_system_info); + sys_info.info_id = CGS_SYSTEM_INFO_PCIE_REV; + cgs_query_system_info(smumgr->device, &sys_info); + rev_id = (uint32_t)sys_info.value; + + if (!((dev_id == 0x687f) && + ((rev_id == 0xc0) || + (rev_id == 0xc1) || + (rev_id == 0xc3)))) { + if (smc_driver_if_version != SMU9_DRIVER_IF_VERSION) { + pr_err("Your firmware(0x%x) doesn't match \ + SMU9_DRIVER_IF_VERSION(0x%x). \ + Please update your firmware!\n", + smc_driver_if_version, SMU9_DRIVER_IF_VERSION); + return -EINVAL; + } } return 0; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h index dbd4fd3a810b71223f1c485dc051515d5da59654..8bd38102b58e25c970a28d2844d0e345c1a06292 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -16,16 +16,16 @@ TRACE_EVENT(amd_sched_job, TP_ARGS(sched_job), TP_STRUCT__entry( __field(struct amd_sched_entity *, entity) - __field(struct amd_sched_job *, sched_job) __field(struct dma_fence *, fence) __field(const char *, name) + __field(uint64_t, id) __field(u32, job_count) __field(int, hw_job_count) ), TP_fast_assign( __entry->entity = sched_job->s_entity; - __entry->sched_job = sched_job; + __entry->id = sched_job->id; __entry->fence = &sched_job->s_fence->finished; __entry->name = sched_job->sched->name; __entry->job_count = kfifo_len( @@ -33,8 +33,9 @@ TRACE_EVENT(amd_sched_job, __entry->hw_job_count = atomic_read( &sched_job->sched->hw_rq_count); ), - TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->sched_job, __entry->fence, __entry->name, + TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->id, + __entry->fence, __entry->name, __entry->job_count, __entry->hw_job_count) ); diff --git a/drivers/gpu/drm/arc/arcpgu_crtc.c b/drivers/gpu/drm/arc/arcpgu_crtc.c index ad9a95916f1f8c06ced45d4a45634042cd697316..16903dc7fe0dc465d7d6f93f138efeb337ca4c42 100644 --- a/drivers/gpu/drm/arc/arcpgu_crtc.c +++ b/drivers/gpu/drm/arc/arcpgu_crtc.c @@ -64,6 +64,20 @@ static const struct drm_crtc_funcs arc_pgu_crtc_funcs = { .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; +static enum drm_mode_status arc_pgu_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + struct arcpgu_drm_private *arcpgu = crtc_to_arcpgu_priv(crtc); + long rate, clk_rate = mode->clock * 1000; + long diff = clk_rate / 200; /* +-0.5% allowed by HDMI spec */ + + rate = clk_round_rate(arcpgu->clk, clk_rate); + if ((max(rate, clk_rate) - min(rate, clk_rate) < diff) && (rate > 0)) + return MODE_OK; + + return MODE_NOCLOCK; +} + static void arc_pgu_crtc_mode_set_nofb(struct drm_crtc *crtc) { struct arcpgu_drm_private *arcpgu = crtc_to_arcpgu_priv(crtc); @@ -106,7 +120,8 @@ static void arc_pgu_crtc_mode_set_nofb(struct drm_crtc *crtc) clk_set_rate(arcpgu->clk, m->crtc_clock * 1000); } -static void arc_pgu_crtc_enable(struct drm_crtc *crtc) +static void arc_pgu_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct arcpgu_drm_private *arcpgu = crtc_to_arcpgu_priv(crtc); @@ -116,7 +131,8 @@ static void arc_pgu_crtc_enable(struct drm_crtc *crtc) ARCPGU_CTRL_ENABLE_MASK); } -static void arc_pgu_crtc_disable(struct drm_crtc *crtc) +static void arc_pgu_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct arcpgu_drm_private *arcpgu = crtc_to_arcpgu_priv(crtc); @@ -129,20 +145,6 @@ static void arc_pgu_crtc_disable(struct drm_crtc *crtc) ~ARCPGU_CTRL_ENABLE_MASK); } -static int arc_pgu_crtc_atomic_check(struct drm_crtc *crtc, - struct drm_crtc_state *state) -{ - struct arcpgu_drm_private *arcpgu = crtc_to_arcpgu_priv(crtc); - struct drm_display_mode *mode = &state->adjusted_mode; - long rate, clk_rate = mode->clock * 1000; - - rate = clk_round_rate(arcpgu->clk, clk_rate); - if (rate != clk_rate) - return -EINVAL; - - return 0; -} - static void arc_pgu_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_crtc_state *state) { @@ -158,15 +160,13 @@ static void arc_pgu_crtc_atomic_begin(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs arc_pgu_crtc_helper_funcs = { + .mode_valid = arc_pgu_crtc_mode_valid, .mode_set = drm_helper_crtc_mode_set, .mode_set_base = drm_helper_crtc_mode_set_base, .mode_set_nofb = arc_pgu_crtc_mode_set_nofb, - .enable = arc_pgu_crtc_enable, - .disable = arc_pgu_crtc_disable, - .prepare = arc_pgu_crtc_disable, - .commit = arc_pgu_crtc_enable, - .atomic_check = arc_pgu_crtc_atomic_check, .atomic_begin = arc_pgu_crtc_atomic_begin, + .atomic_enable = arc_pgu_crtc_atomic_enable, + .atomic_disable = arc_pgu_crtc_atomic_disable, }; static void arc_pgu_plane_atomic_update(struct drm_plane *plane, @@ -218,6 +218,7 @@ static struct drm_plane *arc_pgu_plane_init(struct drm_device *drm) ret = drm_universal_plane_init(drm, plane, 0xff, &arc_pgu_plane_funcs, formats, ARRAY_SIZE(formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c index 3e43a5d4fb09e0c9233f02b6bd1545e5abc190cc..289eda54e5aa83b23216af852445f44f70893b80 100644 --- a/drivers/gpu/drm/arc/arcpgu_drv.c +++ b/drivers/gpu/drm/arc/arcpgu_drv.c @@ -31,7 +31,7 @@ static void arcpgu_fb_output_poll_changed(struct drm_device *dev) drm_fbdev_cma_hotplug_event(arcpgu->fbdev); } -static struct drm_mode_config_funcs arcpgu_drm_modecfg_funcs = { +static const struct drm_mode_config_funcs arcpgu_drm_modecfg_funcs = { .fb_create = drm_fb_cma_create, .output_poll_changed = arcpgu_fb_output_poll_changed, .atomic_check = drm_atomic_helper_check, @@ -48,29 +48,7 @@ static void arcpgu_setup_mode_config(struct drm_device *drm) drm->mode_config.funcs = &arcpgu_drm_modecfg_funcs; } -static int arcpgu_gem_mmap(struct file *filp, struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_mmap(filp, vma); - if (ret) - return ret; - - vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags)); - return 0; -} - -static const struct file_operations arcpgu_drm_ops = { - .owner = THIS_MODULE, - .open = drm_open, - .release = drm_release, - .unlocked_ioctl = drm_ioctl, - .compat_ioctl = drm_compat_ioctl, - .poll = drm_poll, - .read = drm_read, - .llseek = no_llseek, - .mmap = arcpgu_gem_mmap, -}; +DEFINE_DRM_GEM_CMA_FOPS(arcpgu_drm_ops); static void arcpgu_lastclose(struct drm_device *drm) { @@ -142,7 +120,7 @@ static int arcpgu_load(struct drm_device *drm) return -ENODEV; } - platform_set_drvdata(pdev, arcpgu); + platform_set_drvdata(pdev, drm); return 0; } @@ -160,11 +138,37 @@ static int arcpgu_unload(struct drm_device *drm) return 0; } +#ifdef CONFIG_DEBUG_FS +static int arcpgu_show_pxlclock(struct seq_file *m, void *arg) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *drm = node->minor->dev; + struct arcpgu_drm_private *arcpgu = drm->dev_private; + unsigned long clkrate = clk_get_rate(arcpgu->clk); + unsigned long mode_clock = arcpgu->crtc.mode.crtc_clock * 1000; + + seq_printf(m, "hw : %lu\n", clkrate); + seq_printf(m, "mode: %lu\n", mode_clock); + return 0; +} + +static struct drm_info_list arcpgu_debugfs_list[] = { + { "clocks", arcpgu_show_pxlclock, 0 }, + { "fb", drm_fb_cma_debugfs_show, 0 }, +}; + +static int arcpgu_debugfs_init(struct drm_minor *minor) +{ + return drm_debugfs_create_files(arcpgu_debugfs_list, + ARRAY_SIZE(arcpgu_debugfs_list), minor->debugfs_root, minor); +} +#endif + static struct drm_driver arcpgu_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC, .lastclose = arcpgu_lastclose, - .name = "drm-arcpgu", + .name = "arcpgu", .desc = "ARC PGU Controller", .date = "20160219", .major = 1, @@ -172,8 +176,6 @@ static struct drm_driver arcpgu_drm_driver = { .patchlevel = 0, .fops = &arcpgu_drm_ops, .dumb_create = drm_gem_cma_dumb_create, - .dumb_map_offset = drm_gem_cma_dumb_map_offset, - .dumb_destroy = drm_gem_dumb_destroy, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_free_object_unlocked = drm_gem_cma_free_object, @@ -185,6 +187,9 @@ static struct drm_driver arcpgu_drm_driver = { .gem_prime_vmap = drm_gem_cma_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = drm_gem_cma_prime_mmap, +#ifdef CONFIG_DEBUG_FS + .debugfs_init = arcpgu_debugfs_init, +#endif }; static int arcpgu_probe(struct platform_device *pdev) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index d67b6f15e8b8a03f902d02861e51cb696eb1e9b5..72b22b805412b2cb19d8d90e85904be737ecf335 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -165,7 +165,8 @@ static void hdlcd_crtc_mode_set_nofb(struct drm_crtc *crtc) clk_set_rate(hdlcd->clk, m->crtc_clock * 1000); } -static void hdlcd_crtc_enable(struct drm_crtc *crtc) +static void hdlcd_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc); @@ -175,7 +176,8 @@ static void hdlcd_crtc_enable(struct drm_crtc *crtc) drm_crtc_vblank_on(crtc); } -static void hdlcd_crtc_disable(struct drm_crtc *crtc) +static void hdlcd_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc); @@ -218,10 +220,10 @@ static void hdlcd_crtc_atomic_begin(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = { - .enable = hdlcd_crtc_enable, - .disable = hdlcd_crtc_disable, .atomic_check = hdlcd_crtc_atomic_check, .atomic_begin = hdlcd_crtc_atomic_begin, + .atomic_enable = hdlcd_crtc_atomic_enable, + .atomic_disable = hdlcd_crtc_atomic_disable, }; static int hdlcd_plane_atomic_check(struct drm_plane *plane, @@ -313,6 +315,7 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm) ret = drm_universal_plane_init(drm, plane, 0xff, &hdlcd_plane_funcs, formats, ARRAY_SIZE(formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { return ERR_PTR(ret); diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index d3da87fbd85a57ff199881fcb12b8afd93434025..f9bda7b0d2ec275996979e70822fc56b5279c53a 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -253,8 +253,6 @@ static struct drm_driver hdlcd_driver = { .gem_free_object_unlocked = drm_gem_cma_free_object, .gem_vm_ops = &drm_gem_cma_vm_ops, .dumb_create = drm_gem_cma_dumb_create, - .dumb_map_offset = drm_gem_cma_dumb_map_offset, - .dumb_destroy = drm_gem_dumb_destroy, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, @@ -343,7 +341,6 @@ static int hdlcd_drm_bind(struct device *dev) } err_fbdev: drm_kms_helper_poll_fini(drm); - drm_vblank_cleanup(drm); err_vblank: pm_runtime_disable(drm->dev); err_pm_active: @@ -375,7 +372,6 @@ static void hdlcd_drm_unbind(struct device *dev) component_unbind_all(dev, drm); of_node_put(hdlcd->crtc.port); hdlcd->crtc.port = NULL; - drm_vblank_cleanup(drm); pm_runtime_get_sync(drm->dev); drm_irq_uninstall(drm); pm_runtime_put_sync(drm->dev); diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 4bb38a21efecb6e0741a626f986515570ed3b1f3..3615d18a7ddf3a5cc49f7d09d68eec35fccc6e3c 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -46,7 +46,8 @@ static enum drm_mode_status malidp_crtc_mode_valid(struct drm_crtc *crtc, return MODE_OK; } -static void malidp_crtc_enable(struct drm_crtc *crtc) +static void malidp_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct malidp_drm *malidp = crtc_to_malidp_device(crtc); struct malidp_hw_device *hwdev = malidp->dev; @@ -69,7 +70,8 @@ static void malidp_crtc_enable(struct drm_crtc *crtc) drm_crtc_vblank_on(crtc); } -static void malidp_crtc_disable(struct drm_crtc *crtc) +static void malidp_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) { struct malidp_drm *malidp = crtc_to_malidp_device(crtc); struct malidp_hw_device *hwdev = malidp->dev; @@ -408,9 +410,9 @@ static int malidp_crtc_atomic_check(struct drm_crtc *crtc, static const struct drm_crtc_helper_funcs malidp_crtc_helper_funcs = { .mode_valid = malidp_crtc_mode_valid, - .enable = malidp_crtc_enable, - .disable = malidp_crtc_disable, .atomic_check = malidp_crtc_atomic_check, + .atomic_enable = malidp_crtc_atomic_enable, + .atomic_disable = malidp_crtc_atomic_disable, }; static struct drm_crtc_state *malidp_crtc_duplicate_state(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index 01b13d2199179e6ec83c5e91796f42fe905c713d..1a57cc28955e7fe7cbc595c718ba62570f476c0c 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -225,7 +225,7 @@ static void malidp_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_commit_modeset_disables(drm, state); - for_each_crtc_in_state(state, crtc, old_crtc_state, i) { + for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) { malidp_atomic_commit_update_gamma(crtc, old_crtc_state); malidp_atomic_commit_update_coloradj(crtc, old_crtc_state); malidp_atomic_commit_se_config(crtc, old_crtc_state); @@ -331,8 +331,6 @@ static struct drm_driver malidp_driver = { .gem_free_object_unlocked = drm_gem_cma_free_object, .gem_vm_ops = &drm_gem_cma_vm_ops, .dumb_create = drm_gem_cma_dumb_create, - .dumb_map_offset = drm_gem_cma_dumb_map_offset, - .dumb_destroy = drm_gem_dumb_destroy, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 600fa7bd7f5201b451069dee8f21e9871cae70e3..94e7e3fa3408cf163fda7f81b40e76ca73ac4c96 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -128,7 +128,6 @@ static void malidp_plane_atomic_print_state(struct drm_printer *p, static const struct drm_plane_funcs malidp_de_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, - .set_property = drm_atomic_helper_plane_set_property, .destroy = malidp_de_plane_destroy, .reset = malidp_plane_reset, .atomic_duplicate_state = malidp_duplicate_plane_state, @@ -398,7 +397,7 @@ int malidp_de_planes_init(struct drm_device *drm) DRM_PLANE_TYPE_OVERLAY; ret = drm_universal_plane_init(drm, &plane->base, crtcs, &malidp_de_plane_funcs, formats, - n, plane_type, NULL); + n, NULL, plane_type, NULL); if (ret < 0) goto cleanup; diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 4fe19fde84f9624dbae3bdbc062e282401f617c7..2a4d163ac76f7be9e0291d5d1d51a31798688e40 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -334,16 +334,6 @@ static void armada_drm_vblank_off(struct armada_crtc *dcrtc) armada_drm_plane_work_run(dcrtc, dcrtc->crtc.primary); } -void armada_drm_crtc_gamma_set(struct drm_crtc *crtc, u16 r, u16 g, u16 b, - int idx) -{ -} - -void armada_drm_crtc_gamma_get(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, - int idx) -{ -} - /* The mode_config.mutex will be held for this call */ static void armada_drm_crtc_dpms(struct drm_crtc *crtc, int dpms) { @@ -1150,13 +1140,13 @@ int armada_drm_plane_init(struct armada_plane *plane) return 0; } -static struct drm_prop_enum_list armada_drm_csc_yuv_enum_list[] = { +static const struct drm_prop_enum_list armada_drm_csc_yuv_enum_list[] = { { CSC_AUTO, "Auto" }, { CSC_YUV_CCIR601, "CCIR601" }, { CSC_YUV_CCIR709, "CCIR709" }, }; -static struct drm_prop_enum_list armada_drm_csc_rgb_enum_list[] = { +static const struct drm_prop_enum_list armada_drm_csc_rgb_enum_list[] = { { CSC_AUTO, "Auto" }, { CSC_RGB_COMPUTER, "Computer system" }, { CSC_RGB_STUDIO, "Studio" }, @@ -1269,6 +1259,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev, &armada_primary_plane_funcs, armada_primary_formats, ARRAY_SIZE(armada_primary_formats), + NULL, DRM_PLANE_TYPE_PRIMARY, NULL); if (ret) { kfree(primary); @@ -1329,8 +1320,7 @@ armada_lcd_bind(struct device *dev, struct device *master, void *data) port = of_get_child_by_name(parent, "port"); of_node_put(np); if (!port) { - dev_err(dev, "no port node found in %s\n", - parent->full_name); + dev_err(dev, "no port node found in %pOF\n", parent); return -ENXIO; } @@ -1364,7 +1354,7 @@ static int armada_lcd_remove(struct platform_device *pdev) return 0; } -static struct of_device_id armada_lcd_of_match[] = { +static const struct of_device_id armada_lcd_of_match[] = { { .compatible = "marvell,dove-lcd", .data = &armada510_ops, diff --git a/drivers/gpu/drm/armada/armada_crtc.h b/drivers/gpu/drm/armada/armada_crtc.h index 7e8906d3ae2677bd8f644f1f0e3be8b1815719f0..bab11f48357591325d5bab2f80de34981f4d5482 100644 --- a/drivers/gpu/drm/armada/armada_crtc.h +++ b/drivers/gpu/drm/armada/armada_crtc.h @@ -102,8 +102,6 @@ struct armada_crtc { }; #define drm_to_armada_crtc(c) container_of(c, struct armada_crtc, crtc) -void armada_drm_crtc_gamma_set(struct drm_crtc *, u16, u16, u16, int); -void armada_drm_crtc_gamma_get(struct drm_crtc *, u16 *, u16 *, u16 *, int); void armada_drm_crtc_update_regs(struct armada_crtc *, struct armada_regs *); void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc, diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index e618fab7f519d6b8602f65c1bb09de3f1186d814..0b3227c039d768ada13307e99892ff1b9f78ff55 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -232,8 +232,8 @@ static void armada_add_endpoints(struct device *dev, of_node_put(remote); continue; } else if (!of_device_is_available(remote->parent)) { - dev_warn(dev, "parent device of %s is not available\n", - remote->full_name); + dev_warn(dev, "parent device of %pOF is not available\n", + remote); of_node_put(remote); continue; } diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c index 602dfead8eefa6b6bc70e3290fa9671efc3eada1..29c7d047b1525d3323d87d28d7615560fc485df1 100644 --- a/drivers/gpu/drm/armada/armada_fbdev.c +++ b/drivers/gpu/drm/armada/armada_fbdev.c @@ -81,7 +81,6 @@ static int armada_fb_create(struct drm_fb_helper *fbh, strlcpy(info->fix.id, "armada-drmfb", sizeof(info->fix.id)); info->par = fbh; - info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &armada_fb_ops; info->fix.smem_start = obj->phys_addr; info->fix.smem_len = obj->obj.size; @@ -118,8 +117,6 @@ static int armada_fb_probe(struct drm_fb_helper *fbh, } static const struct drm_fb_helper_funcs armada_fb_helper_funcs = { - .gamma_set = armada_drm_crtc_gamma_set, - .gamma_get = armada_drm_crtc_gamma_get, .fb_probe = armada_fb_probe, }; diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c index e9a29df4b4438382cd9d6de5c814738649f1c6fb..edc44910d79fc7c65bce893d31b2caf9ce1d34e4 100644 --- a/drivers/gpu/drm/armada/armada_overlay.c +++ b/drivers/gpu/drm/armada/armada_overlay.c @@ -388,7 +388,7 @@ static const uint32_t armada_ovl_formats[] = { DRM_FORMAT_BGR565, }; -static struct drm_prop_enum_list armada_drm_colorkey_enum_list[] = { +static const struct drm_prop_enum_list armada_drm_colorkey_enum_list[] = { { CKMODE_DISABLE, "disabled" }, { CKMODE_Y, "Y component" }, { CKMODE_U, "U component" }, @@ -460,6 +460,7 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs) &armada_ovl_plane_funcs, armada_ovl_formats, ARRAY_SIZE(armada_ovl_formats), + NULL, DRM_PLANE_TYPE_OVERLAY, NULL); if (ret) { kfree(dplane); diff --git a/drivers/gpu/drm/ast/ast_dp501.c b/drivers/gpu/drm/ast/ast_dp501.c index 76f07f38b941f5e8ea3ad9255e8870b293bfab84..749646ae365fc36e7ab64df83228a9f04a472fc7 100644 --- a/drivers/gpu/drm/ast/ast_dp501.c +++ b/drivers/gpu/drm/ast/ast_dp501.c @@ -4,16 +4,11 @@ #include "ast_drv.h" MODULE_FIRMWARE("ast_dp501_fw.bin"); -int ast_load_dp501_microcode(struct drm_device *dev) +static int ast_load_dp501_microcode(struct drm_device *dev) { struct ast_private *ast = dev->dev_private; - static char *fw_name = "ast_dp501_fw.bin"; - int err; - err = request_firmware(&ast->dp501_fw, fw_name, dev->dev); - if (err) - return err; - return 0; + return request_firmware(&ast->dp501_fw, "ast_dp501_fw.bin", dev->dev); } static void send_ack(struct ast_private *ast) @@ -187,7 +182,7 @@ bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size) return false; } -bool ast_launch_m68k(struct drm_device *dev) +static bool ast_launch_m68k(struct drm_device *dev) { struct ast_private *ast = dev->dev_private; u32 i, data, len = 0; @@ -201,7 +196,11 @@ bool ast_launch_m68k(struct drm_device *dev) if (ast->dp501_fw_addr) { fw_addr = ast->dp501_fw_addr; len = 32*1024; - } else if (ast->dp501_fw) { + } else { + if (!ast->dp501_fw && + ast_load_dp501_microcode(dev) < 0) + return false; + fw_addr = (u8 *)ast->dp501_fw->data; len = ast->dp501_fw->size; } @@ -432,3 +431,11 @@ void ast_init_3rdtx(struct drm_device *dev) } } } + +void ast_release_firmware(struct drm_device *dev) +{ + struct ast_private *ast = dev->dev_private; + + release_firmware(ast->dp501_fw); + ast->dp501_fw = NULL; +} diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index fd7c9eec92e4d75eba8aeb935d6dbcd2e122c1f7..69dab82a37714853b5dfdb74dbe479f7d6c10fb1 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -197,7 +197,6 @@ static struct drm_driver driver = { .load = ast_driver_load, .unload = ast_driver_unload, - .set_busid = drm_pci_set_busid, .fops = &ast_fops, .name = DRIVER_NAME, @@ -210,7 +209,6 @@ static struct drm_driver driver = { .gem_free_object_unlocked = ast_gem_free_object, .dumb_create = ast_dumb_create, .dumb_map_offset = ast_dumb_mmap_offset, - .dumb_destroy = drm_gem_dumb_destroy, }; @@ -221,11 +219,11 @@ static int __init ast_init(void) if (ast_modeset == 0) return -EINVAL; - return drm_pci_init(&driver, &ast_pci_driver); + return pci_register_driver(&ast_pci_driver); } static void __exit ast_exit(void) { - drm_pci_exit(&driver, &ast_pci_driver); + pci_unregister_driver(&ast_pci_driver); } module_init(ast_init); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 8880f0b62e9c2832845d7df2f40e0e77817a9f91..e6c4cd3dc50ec7540d65efd61fea71ddb249ecdd 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -245,7 +245,6 @@ struct ast_connector { struct ast_crtc { struct drm_crtc base; - u8 lut_r[256], lut_g[256], lut_b[256]; struct drm_gem_object *cursor_bo; uint64_t cursor_addr; int cursor_width, cursor_height; @@ -401,11 +400,10 @@ void ast_post_gpu(struct drm_device *dev); u32 ast_mindwm(struct ast_private *ast, u32 r); void ast_moutdwm(struct ast_private *ast, u32 r, u32 v); /* ast dp501 */ -int ast_load_dp501_microcode(struct drm_device *dev); void ast_set_dp501_video_output(struct drm_device *dev, u8 mode); -bool ast_launch_m68k(struct drm_device *dev); bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size); bool ast_dp501_read_edid(struct drm_device *dev, u8 *ediddata); u8 ast_get_dp501_max_clk(struct drm_device *dev); void ast_init_3rdtx(struct drm_device *dev); +void ast_release_firmware(struct drm_device *dev); #endif diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c index 4ad4acd0ccab8528d52be211f321f7f589443dc1..0cd827e11fa20d8af7f038cecb8ea12465014523 100644 --- a/drivers/gpu/drm/ast/ast_fb.c +++ b/drivers/gpu/drm/ast/ast_fb.c @@ -231,7 +231,6 @@ static int astfb_create(struct drm_fb_helper *helper, strcpy(info->fix.id, "astdrmfb"); - info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &astfb_ops; info->apertures->ranges[0].base = pci_resource_start(dev->pdev, 0); @@ -255,27 +254,7 @@ static int astfb_create(struct drm_fb_helper *helper, return ret; } -static void ast_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, - u16 blue, int regno) -{ - struct ast_crtc *ast_crtc = to_ast_crtc(crtc); - ast_crtc->lut_r[regno] = red >> 8; - ast_crtc->lut_g[regno] = green >> 8; - ast_crtc->lut_b[regno] = blue >> 8; -} - -static void ast_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, int regno) -{ - struct ast_crtc *ast_crtc = to_ast_crtc(crtc); - *red = ast_crtc->lut_r[regno] << 8; - *green = ast_crtc->lut_g[regno] << 8; - *blue = ast_crtc->lut_b[regno] << 8; -} - static const struct drm_fb_helper_funcs ast_fb_helper_funcs = { - .gamma_set = ast_fb_gamma_set, - .gamma_get = ast_fb_gamma_get, .fb_probe = astfb_create, }; @@ -287,7 +266,7 @@ static void ast_fbdev_destroy(struct drm_device *dev, drm_fb_helper_unregister_fbi(&afbdev->helper); if (afb->obj) { - drm_gem_object_unreference_unlocked(afb->obj); + drm_gem_object_put_unlocked(afb->obj); afb->obj = NULL; } drm_fb_helper_fini(&afbdev->helper); diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 262c2c0e43b4b162fae7530f88b143814abe8949..dac355812adcbdcea7d68a40ff44bca90b0e0dcf 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -387,9 +387,9 @@ static void ast_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct ast_framebuffer *ast_fb = to_ast_framebuffer(fb); - drm_gem_object_unreference_unlocked(ast_fb->obj); + drm_gem_object_put_unlocked(ast_fb->obj); drm_framebuffer_cleanup(fb); - kfree(fb); + kfree(ast_fb); } static const struct drm_framebuffer_funcs ast_fb_funcs = { @@ -429,13 +429,13 @@ ast_user_framebuffer_create(struct drm_device *dev, ast_fb = kzalloc(sizeof(*ast_fb), GFP_KERNEL); if (!ast_fb) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ERR_PTR(-ENOMEM); } ret = ast_framebuffer_init(dev, ast_fb, mode_cmd, obj); if (ret) { - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); kfree(ast_fb); return ERR_PTR(ret); } @@ -576,6 +576,7 @@ void ast_driver_unload(struct drm_device *dev) { struct ast_private *ast = dev->dev_private; + ast_release_firmware(dev); kfree(ast->dp501_fw_addr); ast_mode_fini(dev); ast_fbdev_fini(dev); @@ -627,7 +628,7 @@ int ast_dumb_create(struct drm_file *file, return ret; ret = drm_gem_handle_create(file, gobj, &handle); - drm_gem_object_unreference_unlocked(gobj); + drm_gem_object_put_unlocked(gobj); if (ret) return ret; @@ -675,7 +676,7 @@ ast_dumb_mmap_offset(struct drm_file *file, bo = gem_to_ast_bo(obj); *offset = ast_bo_mmap_offset(bo); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return 0; diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index aaef0a652f10fdf145b272dc4f28782abfb8701a..6f3849ec0c1d0eb3fabc071c132b4ac7717f77bd 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -63,15 +63,18 @@ static inline void ast_load_palette_index(struct ast_private *ast, static void ast_crtc_load_lut(struct drm_crtc *crtc) { struct ast_private *ast = crtc->dev->dev_private; - struct ast_crtc *ast_crtc = to_ast_crtc(crtc); + u16 *r, *g, *b; int i; if (!crtc->enabled) return; + r = crtc->gamma_store; + g = r + crtc->gamma_size; + b = g + crtc->gamma_size; + for (i = 0; i < 256; i++) - ast_load_palette_index(ast, i, ast_crtc->lut_r[i], - ast_crtc->lut_g[i], ast_crtc->lut_b[i]); + ast_load_palette_index(ast, i, *r++ >> 8, *g++ >> 8, *b++ >> 8); } static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mode *mode, @@ -613,7 +616,23 @@ static int ast_crtc_mode_set(struct drm_crtc *crtc, static void ast_crtc_disable(struct drm_crtc *crtc) { + int ret; + DRM_DEBUG_KMS("\n"); + ast_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); + if (crtc->primary->fb) { + struct ast_framebuffer *ast_fb = to_ast_framebuffer(crtc->primary->fb); + struct drm_gem_object *obj = ast_fb->obj; + struct ast_bo *bo = gem_to_ast_bo(obj); + + ret = ast_bo_reserve(bo, false); + if (ret) + return; + + ast_bo_push_sysram(bo); + ast_bo_unreserve(bo); + } + crtc->primary->fb = NULL; } static void ast_crtc_prepare(struct drm_crtc *crtc) @@ -633,7 +652,6 @@ static const struct drm_crtc_helper_funcs ast_crtc_helper_funcs = { .mode_set = ast_crtc_mode_set, .mode_set_base = ast_crtc_mode_set_base, .disable = ast_crtc_disable, - .load_lut = ast_crtc_load_lut, .prepare = ast_crtc_prepare, .commit = ast_crtc_commit, @@ -648,15 +666,6 @@ static int ast_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, uint32_t size, struct drm_modeset_acquire_ctx *ctx) { - struct ast_crtc *ast_crtc = to_ast_crtc(crtc); - int i; - - /* userspace palettes are always correct as is */ - for (i = 0; i < size; i++) { - ast_crtc->lut_r[i] = red[i] >> 8; - ast_crtc->lut_g[i] = green[i] >> 8; - ast_crtc->lut_b[i] = blue[i] >> 8; - } ast_crtc_load_lut(crtc); return 0; @@ -681,7 +690,6 @@ static const struct drm_crtc_funcs ast_crtc_funcs = { static int ast_crtc_init(struct drm_device *dev) { struct ast_crtc *crtc; - int i; crtc = kzalloc(sizeof(struct ast_crtc), GFP_KERNEL); if (!crtc) @@ -690,12 +698,6 @@ static int ast_crtc_init(struct drm_device *dev) drm_crtc_init(dev, &crtc->base, &ast_crtc_funcs); drm_mode_crtc_set_gamma_size(&crtc->base, 256); drm_crtc_helper_add(&crtc->base, &ast_crtc_helper_funcs); - - for (i = 0; i < 256; i++) { - crtc->lut_r[i] = i; - crtc->lut_g[i] = i; - crtc->lut_b[i] = i; - } return 0; } @@ -948,7 +950,7 @@ static void ast_cursor_fini(struct drm_device *dev) { struct ast_private *ast = dev->dev_private; ttm_bo_kunmap(&ast->cache_kmap); - drm_gem_object_unreference_unlocked(ast->cursor_cache); + drm_gem_object_put_unlocked(ast->cursor_cache); } int ast_mode_init(struct drm_device *dev) @@ -1213,10 +1215,10 @@ static int ast_cursor_set(struct drm_crtc *crtc, ast_show_cursor(crtc); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return 0; fail: - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return ret; } diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 58084985e6cfe2b0cf9d13d1d43b4f2407e543a6..696a15dc2f3f9965e775ea3579383502edff4b57 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -323,10 +323,8 @@ int ast_bo_create(struct drm_device *dev, int size, int align, return -ENOMEM; ret = drm_gem_object_init(dev, &astbo->gem, size); - if (ret) { - kfree(astbo); - return ret; - } + if (ret) + goto error; astbo->bo.bdev = &ast->ttm.bdev; @@ -340,10 +338,13 @@ int ast_bo_create(struct drm_device *dev, int size, int align, align >> PAGE_SHIFT, false, NULL, acc_size, NULL, NULL, ast_bo_ttm_destroy); if (ret) - return ret; + goto error; *pastbo = astbo; return 0; +error: + kfree(astbo); + return ret; } static inline u64 ast_bo_gpu_offset(struct ast_bo *bo) @@ -376,7 +377,7 @@ int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr) int ast_bo_unpin(struct ast_bo *bo) { - int i, ret; + int i; if (!bo->pin_count) { DRM_ERROR("unpin bad %p\n", bo); return 0; @@ -387,11 +388,7 @@ int ast_bo_unpin(struct ast_bo *bo) for (i = 0; i < bo->placement.num_placement ; i++) bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; - ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); - if (ret) - return ret; - - return 0; + return ttm_bo_validate(&bo->bo, &bo->placement, false, false); } int ast_bo_push_sysram(struct ast_bo *bo) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index 53489859997b4991e4feafbac9a7360b9050a5ca..d73281095facabbaf7b54d6f309123f8b044821d 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -149,7 +149,8 @@ atmel_hlcdc_crtc_mode_valid(struct drm_crtc *c, return atmel_hlcdc_dc_mode_valid(crtc->dc, mode); } -static void atmel_hlcdc_crtc_disable(struct drm_crtc *c) +static void atmel_hlcdc_crtc_atomic_disable(struct drm_crtc *c, + struct drm_crtc_state *old_state) { struct drm_device *dev = c->dev; struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c); @@ -183,7 +184,8 @@ static void atmel_hlcdc_crtc_disable(struct drm_crtc *c) pm_runtime_put_sync(dev->dev); } -static void atmel_hlcdc_crtc_enable(struct drm_crtc *c) +static void atmel_hlcdc_crtc_atomic_enable(struct drm_crtc *c, + struct drm_crtc_state *old_state) { struct drm_device *dev = c->dev; struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c); @@ -235,7 +237,7 @@ static int atmel_hlcdc_crtc_select_output_mode(struct drm_crtc_state *state) crtc = drm_crtc_to_atmel_hlcdc_crtc(state->crtc); - for_each_connector_in_state(state->state, connector, cstate, i) { + for_each_new_connector_in_state(state->state, connector, cstate, i) { struct drm_display_info *info = &connector->display_info; unsigned int supported_fmts = 0; int j; @@ -319,11 +321,11 @@ static const struct drm_crtc_helper_funcs lcdc_crtc_helper_funcs = { .mode_set = drm_helper_crtc_mode_set, .mode_set_nofb = atmel_hlcdc_crtc_mode_set_nofb, .mode_set_base = drm_helper_crtc_mode_set_base, - .disable = atmel_hlcdc_crtc_disable, - .enable = atmel_hlcdc_crtc_enable, .atomic_check = atmel_hlcdc_crtc_atomic_check, .atomic_begin = atmel_hlcdc_crtc_atomic_begin, .atomic_flush = atmel_hlcdc_crtc_atomic_flush, + .atomic_enable = atmel_hlcdc_crtc_atomic_enable, + .atomic_disable = atmel_hlcdc_crtc_atomic_disable, }; static void atmel_hlcdc_crtc_destroy(struct drm_crtc *c) @@ -429,6 +431,7 @@ static const struct drm_crtc_funcs atmel_hlcdc_crtc_funcs = { .atomic_destroy_state = atmel_hlcdc_crtc_destroy_state, .enable_vblank = atmel_hlcdc_crtc_enable_vblank, .disable_vblank = atmel_hlcdc_crtc_disable_vblank, + .gamma_set = drm_atomic_helper_legacy_gamma_set, }; int atmel_hlcdc_crtc_create(struct drm_device *dev) @@ -484,6 +487,10 @@ int atmel_hlcdc_crtc_create(struct drm_device *dev) drm_crtc_helper_add(&crtc->base, &lcdc_crtc_helper_funcs); drm_crtc_vblank_reset(&crtc->base); + drm_mode_crtc_set_gamma_size(&crtc->base, ATMEL_HLCDC_CLUT_SIZE); + drm_crtc_enable_color_mgmt(&crtc->base, 0, false, + ATMEL_HLCDC_CLUT_SIZE); + dc->crtc = &crtc->base; return 0; diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index 30dbffdb45a357bafb4f612d4887413933cdb317..74d66e11f68876c622e5e516aae3f266a2a037fd 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -42,6 +42,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_at91sam9n12_layers[] = { .default_color = 3, .general_config = 4, }, + .clut_offset = 0x400, }, }; @@ -73,6 +74,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_at91sam9x5_layers[] = { .disc_pos = 5, .disc_size = 6, }, + .clut_offset = 0x400, }, { .name = "overlay1", @@ -91,6 +93,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_at91sam9x5_layers[] = { .chroma_key_mask = 8, .general_config = 9, }, + .clut_offset = 0x800, }, { .name = "high-end-overlay", @@ -112,6 +115,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_at91sam9x5_layers[] = { .scaler_config = 13, .csc = 14, }, + .clut_offset = 0x1000, }, { .name = "cursor", @@ -131,6 +135,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_at91sam9x5_layers[] = { .chroma_key_mask = 8, .general_config = 9, }, + .clut_offset = 0x1400, }, }; @@ -162,6 +167,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d3_layers[] = { .disc_pos = 5, .disc_size = 6, }, + .clut_offset = 0x600, }, { .name = "overlay1", @@ -180,6 +186,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d3_layers[] = { .chroma_key_mask = 8, .general_config = 9, }, + .clut_offset = 0xa00, }, { .name = "overlay2", @@ -198,6 +205,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d3_layers[] = { .chroma_key_mask = 8, .general_config = 9, }, + .clut_offset = 0xe00, }, { .name = "high-end-overlay", @@ -223,6 +231,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d3_layers[] = { }, .csc = 14, }, + .clut_offset = 0x1200, }, { .name = "cursor", @@ -244,6 +253,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d3_layers[] = { .general_config = 9, .scaler_config = 13, }, + .clut_offset = 0x1600, }, }; @@ -275,6 +285,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d4_layers[] = { .disc_pos = 5, .disc_size = 6, }, + .clut_offset = 0x600, }, { .name = "overlay1", @@ -293,6 +304,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d4_layers[] = { .chroma_key_mask = 8, .general_config = 9, }, + .clut_offset = 0xa00, }, { .name = "overlay2", @@ -311,6 +323,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d4_layers[] = { .chroma_key_mask = 8, .general_config = 9, }, + .clut_offset = 0xe00, }, { .name = "high-end-overlay", @@ -336,6 +349,7 @@ static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sama5d4_layers[] = { }, .csc = 14, }, + .clut_offset = 0x1200, }, }; @@ -451,8 +465,7 @@ static void atmel_hlcdc_fb_output_poll_changed(struct drm_device *dev) { struct atmel_hlcdc_dc *dc = dev->dev_private; - if (dc->fbdev) - drm_fbdev_cma_hotplug_event(dc->fbdev); + drm_fbdev_cma_hotplug_event(dc->fbdev); } struct atmel_hlcdc_dc_commit { @@ -526,14 +539,13 @@ static int atmel_hlcdc_dc_atomic_commit(struct drm_device *dev, dc->commit.pending = true; spin_unlock(&dc->commit.wait.lock); - if (ret) { - kfree(commit); - goto error; - } + if (ret) + goto err_free; - /* Swap the state, this is the point of no return. */ - drm_atomic_helper_swap_state(state, true); + /* We have our own synchronization through the commit lock. */ + BUG_ON(drm_atomic_helper_swap_state(state, false) < 0); + /* Swap state succeeded, this is the point of no return. */ drm_atomic_state_get(state); if (async) queue_work(dc->wq, &commit->work); @@ -542,6 +554,8 @@ static int atmel_hlcdc_dc_atomic_commit(struct drm_device *dev, return 0; +err_free: + kfree(commit); error: drm_atomic_helper_cleanup_planes(dev, state); return ret; @@ -747,8 +761,6 @@ static struct drm_driver atmel_hlcdc_dc_driver = { .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = drm_gem_cma_prime_mmap, .dumb_create = drm_gem_cma_dumb_create, - .dumb_map_offset = drm_gem_cma_dumb_map_offset, - .dumb_destroy = drm_gem_dumb_destroy, .fops = &fops, .name = "atmel-hlcdc", .desc = "Atmel HLCD Controller DRM", diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h index b0596a84c1b881dd1b6d2f46a595c54a5dbc7807..4237b0446721ef925a6f3663823ea937c8cbfe47 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h @@ -88,6 +88,11 @@ #define ATMEL_HLCDC_YUV422SWP BIT(17) #define ATMEL_HLCDC_DSCALEOPT BIT(20) +#define ATMEL_HLCDC_C1_MODE ATMEL_HLCDC_CLUT_MODE(0) +#define ATMEL_HLCDC_C2_MODE ATMEL_HLCDC_CLUT_MODE(1) +#define ATMEL_HLCDC_C4_MODE ATMEL_HLCDC_CLUT_MODE(2) +#define ATMEL_HLCDC_C8_MODE ATMEL_HLCDC_CLUT_MODE(3) + #define ATMEL_HLCDC_XRGB4444_MODE ATMEL_HLCDC_RGB_MODE(0) #define ATMEL_HLCDC_ARGB4444_MODE ATMEL_HLCDC_RGB_MODE(1) #define ATMEL_HLCDC_RGBA4444_MODE ATMEL_HLCDC_RGB_MODE(2) @@ -142,6 +147,8 @@ #define ATMEL_HLCDC_DMA_CHANNEL_DSCR_DONE BIT(2) #define ATMEL_HLCDC_DMA_CHANNEL_DSCR_OVERRUN BIT(3) +#define ATMEL_HLCDC_CLUT_SIZE 256 + #define ATMEL_HLCDC_MAX_LAYERS 6 /** @@ -259,6 +266,7 @@ struct atmel_hlcdc_layer_desc { int id; int regs_offset; int cfgs_offset; + int clut_offset; struct atmel_hlcdc_formats *formats; struct atmel_hlcdc_layer_cfg_layout layout; int max_width; @@ -414,6 +422,14 @@ static inline u32 atmel_hlcdc_layer_read_cfg(struct atmel_hlcdc_layer *layer, (cfgid * sizeof(u32))); } +static inline void atmel_hlcdc_layer_write_clut(struct atmel_hlcdc_layer *layer, + unsigned int c, u32 val) +{ + regmap_write(layer->regmap, + layer->desc->clut_offset + c * sizeof(u32), + val); +} + static inline void atmel_hlcdc_layer_init(struct atmel_hlcdc_layer *layer, const struct atmel_hlcdc_layer_desc *desc, struct regmap *regmap) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index 1124200bb280d8f58eb59e5370aa91b7916bb134..703c2d13603fc8f6e8cbae7fb7e1ac7c8dafda8a 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -83,6 +83,7 @@ drm_plane_state_to_atmel_hlcdc_plane_state(struct drm_plane_state *s) #define SUBPIXEL_MASK 0xffff static uint32_t rgb_formats[] = { + DRM_FORMAT_C8, DRM_FORMAT_XRGB4444, DRM_FORMAT_ARGB4444, DRM_FORMAT_RGBA4444, @@ -100,6 +101,7 @@ struct atmel_hlcdc_formats atmel_hlcdc_plane_rgb_formats = { }; static uint32_t rgb_and_yuv_formats[] = { + DRM_FORMAT_C8, DRM_FORMAT_XRGB4444, DRM_FORMAT_ARGB4444, DRM_FORMAT_RGBA4444, @@ -128,6 +130,9 @@ struct atmel_hlcdc_formats atmel_hlcdc_plane_rgb_and_yuv_formats = { static int atmel_hlcdc_format_to_plane_mode(u32 format, u32 *mode) { switch (format) { + case DRM_FORMAT_C8: + *mode = ATMEL_HLCDC_C8_MODE; + break; case DRM_FORMAT_XRGB4444: *mode = ATMEL_HLCDC_XRGB4444_MODE; break; @@ -424,6 +429,29 @@ static void atmel_hlcdc_plane_update_format(struct atmel_hlcdc_plane *plane, ATMEL_HLCDC_LAYER_FORMAT_CFG, cfg); } +static void atmel_hlcdc_plane_update_clut(struct atmel_hlcdc_plane *plane) +{ + struct drm_crtc *crtc = plane->base.crtc; + struct drm_color_lut *lut; + int idx; + + if (!crtc || !crtc->state) + return; + + if (!crtc->state->color_mgmt_changed || !crtc->state->gamma_lut) + return; + + lut = (struct drm_color_lut *)crtc->state->gamma_lut->data; + + for (idx = 0; idx < ATMEL_HLCDC_CLUT_SIZE; idx++, lut++) { + u32 val = ((lut->red << 8) & 0xff0000) | + (lut->green & 0xff00) | + (lut->blue >> 8); + + atmel_hlcdc_layer_write_clut(&plane->layer, idx, val); + } +} + static void atmel_hlcdc_plane_update_buffers(struct atmel_hlcdc_plane *plane, struct atmel_hlcdc_plane_state *state) { @@ -768,6 +796,7 @@ static void atmel_hlcdc_plane_atomic_update(struct drm_plane *p, atmel_hlcdc_plane_update_pos_and_size(plane, state); atmel_hlcdc_plane_update_general_settings(plane, state); atmel_hlcdc_plane_update_format(plane, state); + atmel_hlcdc_plane_update_clut(plane); atmel_hlcdc_plane_update_buffers(plane, state); atmel_hlcdc_plane_update_disc_area(plane, state); @@ -809,7 +838,7 @@ static void atmel_hlcdc_plane_destroy(struct drm_plane *p) struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p); if (plane->base.fb) - drm_framebuffer_unreference(plane->base.fb); + drm_framebuffer_put(plane->base.fb); drm_plane_cleanup(p); } @@ -911,7 +940,7 @@ void atmel_hlcdc_plane_irq(struct atmel_hlcdc_plane *plane) desc->name); } -static struct drm_plane_helper_funcs atmel_hlcdc_layer_plane_helper_funcs = { +static const struct drm_plane_helper_funcs atmel_hlcdc_layer_plane_helper_funcs = { .atomic_check = atmel_hlcdc_plane_atomic_check, .atomic_update = atmel_hlcdc_plane_atomic_update, .atomic_disable = atmel_hlcdc_plane_atomic_disable, @@ -958,7 +987,7 @@ static void atmel_hlcdc_plane_reset(struct drm_plane *p) state = drm_plane_state_to_atmel_hlcdc_plane_state(p->state); if (state->base.fb) - drm_framebuffer_unreference(state->base.fb); + drm_framebuffer_put(state->base.fb); kfree(state); p->state = NULL; @@ -996,7 +1025,7 @@ atmel_hlcdc_plane_atomic_duplicate_state(struct drm_plane *p) } if (copy->base.fb) - drm_framebuffer_reference(copy->base.fb); + drm_framebuffer_get(copy->base.fb); return ©->base; } @@ -1015,15 +1044,14 @@ static void atmel_hlcdc_plane_atomic_destroy_state(struct drm_plane *p, } if (s->fb) - drm_framebuffer_unreference(s->fb); + drm_framebuffer_put(s->fb); kfree(state); } -static struct drm_plane_funcs layer_plane_funcs = { +static const struct drm_plane_funcs layer_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, - .set_property = drm_atomic_helper_plane_set_property, .destroy = atmel_hlcdc_plane_destroy, .reset = atmel_hlcdc_plane_reset, .atomic_duplicate_state = atmel_hlcdc_plane_atomic_duplicate_state, @@ -1058,7 +1086,8 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, ret = drm_universal_plane_init(dev, &plane->base, 0, &layer_plane_funcs, desc->formats->formats, - desc->formats->nformats, type, NULL); + desc->formats->nformats, + NULL, type, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c index aa342515ddf43c86f00b9082812305d96eef65e3..7b20318483e4d37e8a91489a721db2a60433f703 100644 --- a/drivers/gpu/drm/bochs/bochs_drv.c +++ b/drivers/gpu/drm/bochs/bochs_drv.c @@ -84,7 +84,6 @@ static struct drm_driver bochs_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET, .load = bochs_load, .unload = bochs_unload, - .set_busid = drm_pci_set_busid, .fops = &bochs_fops, .name = "bochs-drm", .desc = "bochs dispi vga interface (qemu stdvga)", @@ -94,7 +93,6 @@ static struct drm_driver bochs_driver = { .gem_free_object_unlocked = bochs_gem_free_object, .dumb_create = bochs_dumb_create, .dumb_map_offset = bochs_dumb_mmap_offset, - .dumb_destroy = drm_gem_dumb_destroy, }; /* ---------------------------------------------------------------------- */ @@ -224,12 +222,12 @@ static int __init bochs_init(void) if (bochs_modeset == 0) return -EINVAL; - return drm_pci_init(&bochs_driver, &bochs_pci_driver); + return pci_register_driver(&bochs_pci_driver); } static void __exit bochs_exit(void) { - drm_pci_exit(&bochs_driver, &bochs_pci_driver); + pci_unregister_driver(&bochs_pci_driver); } module_init(bochs_init); diff --git a/drivers/gpu/drm/bochs/bochs_fbdev.c b/drivers/gpu/drm/bochs/bochs_fbdev.c index c38deffa14de24054711ab96d28fbe30a17c8f35..14eb8d0d5a00a5f679219e7b101633678421af1b 100644 --- a/drivers/gpu/drm/bochs/bochs_fbdev.c +++ b/drivers/gpu/drm/bochs/bochs_fbdev.c @@ -23,9 +23,9 @@ static int bochsfb_mmap(struct fb_info *info, static struct fb_ops bochsfb_ops = { .owner = THIS_MODULE, DRM_FB_HELPER_DEFAULT_OPS, - .fb_fillrect = drm_fb_helper_sys_fillrect, - .fb_copyarea = drm_fb_helper_sys_copyarea, - .fb_imageblit = drm_fb_helper_sys_imageblit, + .fb_fillrect = drm_fb_helper_cfb_fillrect, + .fb_copyarea = drm_fb_helper_cfb_copyarea, + .fb_imageblit = drm_fb_helper_cfb_imageblit, .fb_mmap = bochsfb_mmap, }; @@ -118,7 +118,6 @@ static int bochsfb_create(struct drm_fb_helper *helper, strcpy(info->fix.id, "bochsdrmfb"); - info->flags = FBINFO_DEFAULT; info->fbops = &bochsfb_ops; drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index f75ab627811349beb55180a2c651e1c666da08a7..b2431aee788795cfa078dbf4b5515c75dbe1961c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -785,8 +785,7 @@ adv7511_connector_detect(struct drm_connector *connector, bool force) return adv7511_detect(adv, connector); } -static struct drm_connector_funcs adv7511_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, +static const struct drm_connector_funcs adv7511_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, .detect = adv7511_connector_detect, .destroy = drm_connector_cleanup, @@ -857,7 +856,7 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge) return ret; } -static struct drm_bridge_funcs adv7511_bridge_funcs = { +static const struct drm_bridge_funcs adv7511_bridge_funcs = { .enable = adv7511_bridge_enable, .disable = adv7511_bridge_disable, .mode_set = adv7511_bridge_mode_set, @@ -1126,11 +1125,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) adv7511->bridge.funcs = &adv7511_bridge_funcs; adv7511->bridge.of_node = dev->of_node; - ret = drm_bridge_add(&adv7511->bridge); - if (ret) { - dev_err(dev, "failed to add adv7511 bridge\n"); - goto err_unregister_cec; - } + drm_bridge_add(&adv7511->bridge); adv7511_audio_init(dev, adv7511); diff --git a/drivers/gpu/drm/bridge/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix-anx78xx.c index 9006578b9789b0dc248184b939289160929c274d..9385eb0b1ee40ca5441eb3b1876e04265d394bad 100644 --- a/drivers/gpu/drm/bridge/analogix-anx78xx.c +++ b/drivers/gpu/drm/bridge/analogix-anx78xx.c @@ -1002,7 +1002,6 @@ static enum drm_connector_status anx78xx_detect(struct drm_connector *connector, } static const struct drm_connector_funcs anx78xx_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .fill_modes = drm_helper_probe_single_connector_modes, .detect = anx78xx_detect, .destroy = drm_connector_cleanup, @@ -1097,7 +1096,8 @@ static void anx78xx_bridge_mode_set(struct drm_bridge *bridge, mutex_lock(&anx78xx->lock); - err = drm_hdmi_avi_infoframe_from_display_mode(&frame, adjusted_mode); + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, adjusted_mode, + false); if (err) { DRM_ERROR("Failed to setup AVI infoframe: %d\n", err); goto unlock; @@ -1438,11 +1438,7 @@ static int anx78xx_i2c_probe(struct i2c_client *client, anx78xx->bridge.funcs = &anx78xx_bridge_funcs; - err = drm_bridge_add(&anx78xx->bridge); - if (err < 0) { - DRM_ERROR("Failed to add drm bridge: %d\n", err); - goto err_poweroff; - } + drm_bridge_add(&anx78xx->bridge); /* If cable is pulled out, just poweroff and wait for HPD event */ if (!gpiod_get_value(anx78xx->pdata.gpiod_hpd)) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 4c758ed51939d5a4fdacb9cacb93bab4f0953bb8..5dd3f1cd074a1d36f17022af23c7a23638eaa41e 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1005,7 +1005,6 @@ analogix_dp_detect(struct drm_connector *connector, bool force) } static const struct drm_connector_funcs analogix_dp_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .fill_modes = drm_helper_probe_single_connector_modes, .detect = analogix_dp_detect, .destroy = drm_connector_cleanup, diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c index 831a606c4706815999a938966ae561ec10497103..de5e7dee7ad60f808f7d0380d0e44e827745204f 100644 --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c @@ -92,7 +92,6 @@ dumb_vga_connector_detect(struct drm_connector *connector, bool force) } static const struct drm_connector_funcs dumb_vga_con_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .detect = dumb_vga_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, @@ -177,7 +176,6 @@ static struct i2c_adapter *dumb_vga_retrieve_ddc(struct device *dev) static int dumb_vga_probe(struct platform_device *pdev) { struct dumb_vga *vga; - int ret; vga = devm_kzalloc(&pdev->dev, sizeof(*vga), GFP_KERNEL); if (!vga) @@ -186,7 +184,7 @@ static int dumb_vga_probe(struct platform_device *pdev) vga->vdd = devm_regulator_get_optional(&pdev->dev, "vdd"); if (IS_ERR(vga->vdd)) { - ret = PTR_ERR(vga->vdd); + int ret = PTR_ERR(vga->vdd); if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; vga->vdd = NULL; @@ -207,11 +205,9 @@ static int dumb_vga_probe(struct platform_device *pdev) vga->bridge.funcs = &dumb_vga_bridge_funcs; vga->bridge.of_node = pdev->dev.of_node; - ret = drm_bridge_add(&vga->bridge); - if (ret && !IS_ERR(vga->ddc)) - i2c_put_adapter(vga->ddc); + drm_bridge_add(&vga->bridge); - return ret; + return 0; } static int dumb_vga_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 11f11086a68fd97d62b8d7febd1ca017de621ce9..7ccadba7c98cd30c3c81e3e0dc183cd050ab6d63 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -193,7 +193,6 @@ static enum drm_connector_status ge_b850v3_lvds_detect( } static const struct drm_connector_funcs ge_b850v3_lvds_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .fill_modes = drm_helper_probe_single_connector_modes, .detect = ge_b850v3_lvds_detect, .destroy = drm_connector_cleanup, diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index 4f64e717e01be4a4dd46bc2c3310f8615b71d2e6..d64a3283822ae7a877175aad01305d32b6d49bf5 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -238,7 +238,6 @@ static const struct drm_connector_helper_funcs ptn3460_connector_helper_funcs = }; static const struct drm_connector_funcs ptn3460_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .reset = drm_atomic_helper_connector_reset, @@ -332,11 +331,7 @@ static int ptn3460_probe(struct i2c_client *client, ptn_bridge->bridge.funcs = &ptn3460_bridge_funcs; ptn_bridge->bridge.of_node = dev->of_node; - ret = drm_bridge_add(&ptn_bridge->bridge); - if (ret) { - DRM_ERROR("Failed to add bridge\n"); - return ret; - } + drm_bridge_add(&ptn_bridge->bridge); i2c_set_clientdata(client, ptn_bridge); diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 67fe19e5a9c6e65777c528e9a333e2c2247233fd..e0cca19b404406d4a22a4a40d871e8a8bbd5b2e0 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -50,7 +50,6 @@ panel_bridge_connector_helper_funcs = { }; static const struct drm_connector_funcs panel_bridge_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .reset = drm_atomic_helper_connector_reset, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, @@ -158,7 +157,6 @@ struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel, u32 connector_type) { struct panel_bridge *panel_bridge; - int ret; if (!panel) return ERR_PTR(-EINVAL); @@ -176,9 +174,7 @@ struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel, panel_bridge->bridge.of_node = panel->dev->of_node; #endif - ret = drm_bridge_add(&panel_bridge->bridge); - if (ret) - return ERR_PTR(ret); + drm_bridge_add(&panel_bridge->bridge); return &panel_bridge->bridge; } @@ -198,3 +194,33 @@ void drm_panel_bridge_remove(struct drm_bridge *bridge) devm_kfree(panel_bridge->panel->dev, bridge); } EXPORT_SYMBOL(drm_panel_bridge_remove); + +static void devm_drm_panel_bridge_release(struct device *dev, void *res) +{ + struct drm_bridge **bridge = res; + + drm_panel_bridge_remove(*bridge); +} + +struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev, + struct drm_panel *panel, + u32 connector_type) +{ + struct drm_bridge **ptr, *bridge; + + ptr = devres_alloc(devm_drm_panel_bridge_release, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + bridge = drm_panel_bridge_add(panel, connector_type); + if (!IS_ERR(bridge)) { + *ptr = bridge; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return bridge; +} +EXPORT_SYMBOL(devm_drm_panel_bridge_add); diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c index 6f22f9fec9bfed145e6fd781c715287c97f1d682..81198f5e9afacf91bf2301652a2ac4671030f7cf 100644 --- a/drivers/gpu/drm/bridge/parade-ps8622.c +++ b/drivers/gpu/drm/bridge/parade-ps8622.c @@ -476,7 +476,6 @@ static const struct drm_connector_helper_funcs ps8622_connector_helper_funcs = { }; static const struct drm_connector_funcs ps8622_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, .reset = drm_atomic_helper_connector_reset, @@ -598,11 +597,7 @@ static int ps8622_probe(struct i2c_client *client, ps8622->bridge.funcs = &ps8622_bridge_funcs; ps8622->bridge.of_node = dev->of_node; - ret = drm_bridge_add(&ps8622->bridge); - if (ret) { - DRM_ERROR("Failed to add bridge\n"); - return ret; - } + drm_bridge_add(&ps8622->bridge); i2c_set_clientdata(client, ps8622); diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 9b87067c022cd8f541d94310dd5266ab84cf7c9d..b1ab4ab09532b49901bdd7126f951c40a10424d5 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -124,7 +124,6 @@ sii902x_connector_detect(struct drm_connector *connector, bool force) } static const struct drm_connector_funcs sii902x_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .detect = sii902x_connector_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = drm_connector_cleanup, @@ -269,7 +268,7 @@ static void sii902x_bridge_mode_set(struct drm_bridge *bridge, if (ret) return; - ret = drm_hdmi_avi_infoframe_from_display_mode(&frame, adj); + ret = drm_hdmi_avi_infoframe_from_display_mode(&frame, adj, false); if (ret < 0) { DRM_ERROR("couldn't fill AVI infoframe\n"); return; @@ -418,11 +417,7 @@ static int sii902x_probe(struct i2c_client *client, sii902x->bridge.funcs = &sii902x_bridge_funcs; sii902x->bridge.of_node = dev->of_node; - ret = drm_bridge_add(&sii902x->bridge); - if (ret) { - dev_err(dev, "Failed to add drm_bridge\n"); - return ret; - } + drm_bridge_add(&sii902x->bridge); i2c_set_clientdata(client, sii902x); diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 2d51a2269fc610ffc705abcb7a49c1bdcaffbae9..5131bfb94f065ceb20ba61713f990b76f11103cb 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -597,9 +597,9 @@ static void sii8620_mt_read_devcap(struct sii8620 *ctx, bool xdevcap) static void sii8620_mt_read_devcap_reg_recv(struct sii8620 *ctx, struct sii8620_mt_msg *msg) { - u8 reg = msg->reg[0] & 0x7f; + u8 reg = msg->reg[1] & 0x7f; - if (msg->reg[0] & 0x80) + if (msg->reg[1] & 0x80) ctx->xdevcap[reg] = msg->ret; else ctx->devcap[reg] = msg->ret; diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index 53e78d092d18e3b9085d0784772639f8fa01f21c..3cc53b44186e2d3e3eae3fb7ad6bf0379254b319 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -2,6 +2,7 @@ config DRM_DW_HDMI tristate select DRM_KMS_HELPER select REGMAP_MMIO + select CEC_CORE if CEC_NOTIFIER config DRM_DW_HDMI_AHB_AUDIO tristate "Synopsys Designware AHB Audio interface" @@ -22,3 +23,18 @@ config DRM_DW_HDMI_I2S_AUDIO help Support the I2S Audio interface which is part of the Synopsys Designware HDMI block. + +config DRM_DW_HDMI_CEC + tristate "Synopsis Designware CEC interface" + depends on DRM_DW_HDMI + select CEC_CORE + select CEC_NOTIFIER + help + Support the CE interface which is part of the Synopsys + Designware HDMI block. + +config DRM_DW_MIPI_DSI + tristate + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile index 17aa7a65b57e677e06d4b20dfeb23adb23ec0941..5dad97d920be4842da57d90e78f1f79e5f2f8470 100644 --- a/drivers/gpu/drm/bridge/synopsys/Makefile +++ b/drivers/gpu/drm/bridge/synopsys/Makefile @@ -3,3 +3,6 @@ obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o +obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o + +obj-$(CONFIG_DRM_DW_MIPI_DSI) += dw-mipi-dsi.o diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c index 8f2d1379c8809b8b8c428fb521c58bf1282d7c52..cf3f0caf9c632fe47dd5f48999031100e12cf68c 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c @@ -517,7 +517,7 @@ static snd_pcm_uframes_t dw_hdmi_pointer(struct snd_pcm_substream *substream) return bytes_to_frames(runtime, dw->buf_offset); } -static struct snd_pcm_ops snd_dw_hdmi_ops = { +static const struct snd_pcm_ops snd_dw_hdmi_ops = { .open = dw_hdmi_open, .close = dw_hdmi_close, .ioctl = snd_pcm_lib_ioctl, diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c new file mode 100644 index 0000000000000000000000000000000000000000..6c323510f1288c13897e60ba9205674e64cff6aa --- /dev/null +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.c @@ -0,0 +1,327 @@ +/* + * Designware HDMI CEC driver + * + * Copyright (C) 2015-2017 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "dw-hdmi-cec.h" + +enum { + HDMI_IH_CEC_STAT0 = 0x0106, + HDMI_IH_MUTE_CEC_STAT0 = 0x0186, + + HDMI_CEC_CTRL = 0x7d00, + CEC_CTRL_START = BIT(0), + CEC_CTRL_FRAME_TYP = 3 << 1, + CEC_CTRL_RETRY = 0 << 1, + CEC_CTRL_NORMAL = 1 << 1, + CEC_CTRL_IMMED = 2 << 1, + + HDMI_CEC_STAT = 0x7d01, + CEC_STAT_DONE = BIT(0), + CEC_STAT_EOM = BIT(1), + CEC_STAT_NACK = BIT(2), + CEC_STAT_ARBLOST = BIT(3), + CEC_STAT_ERROR_INIT = BIT(4), + CEC_STAT_ERROR_FOLL = BIT(5), + CEC_STAT_WAKEUP = BIT(6), + + HDMI_CEC_MASK = 0x7d02, + HDMI_CEC_POLARITY = 0x7d03, + HDMI_CEC_INT = 0x7d04, + HDMI_CEC_ADDR_L = 0x7d05, + HDMI_CEC_ADDR_H = 0x7d06, + HDMI_CEC_TX_CNT = 0x7d07, + HDMI_CEC_RX_CNT = 0x7d08, + HDMI_CEC_TX_DATA0 = 0x7d10, + HDMI_CEC_RX_DATA0 = 0x7d20, + HDMI_CEC_LOCK = 0x7d30, + HDMI_CEC_WKUPCTRL = 0x7d31, +}; + +struct dw_hdmi_cec { + struct dw_hdmi *hdmi; + const struct dw_hdmi_cec_ops *ops; + u32 addresses; + struct cec_adapter *adap; + struct cec_msg rx_msg; + unsigned int tx_status; + bool tx_done; + bool rx_done; + struct cec_notifier *notify; + int irq; +}; + +static void dw_hdmi_write(struct dw_hdmi_cec *cec, u8 val, int offset) +{ + cec->ops->write(cec->hdmi, val, offset); +} + +static u8 dw_hdmi_read(struct dw_hdmi_cec *cec, int offset) +{ + return cec->ops->read(cec->hdmi, offset); +} + +static int dw_hdmi_cec_log_addr(struct cec_adapter *adap, u8 logical_addr) +{ + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + + if (logical_addr == CEC_LOG_ADDR_INVALID) + cec->addresses = 0; + else + cec->addresses |= BIT(logical_addr) | BIT(15); + + dw_hdmi_write(cec, cec->addresses & 255, HDMI_CEC_ADDR_L); + dw_hdmi_write(cec, cec->addresses >> 8, HDMI_CEC_ADDR_H); + + return 0; +} + +static int dw_hdmi_cec_transmit(struct cec_adapter *adap, u8 attempts, + u32 signal_free_time, struct cec_msg *msg) +{ + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + unsigned int i, ctrl; + + switch (signal_free_time) { + case CEC_SIGNAL_FREE_TIME_RETRY: + ctrl = CEC_CTRL_RETRY; + break; + case CEC_SIGNAL_FREE_TIME_NEW_INITIATOR: + default: + ctrl = CEC_CTRL_NORMAL; + break; + case CEC_SIGNAL_FREE_TIME_NEXT_XFER: + ctrl = CEC_CTRL_IMMED; + break; + } + + for (i = 0; i < msg->len; i++) + dw_hdmi_write(cec, msg->msg[i], HDMI_CEC_TX_DATA0 + i); + + dw_hdmi_write(cec, msg->len, HDMI_CEC_TX_CNT); + dw_hdmi_write(cec, ctrl | CEC_CTRL_START, HDMI_CEC_CTRL); + + return 0; +} + +static irqreturn_t dw_hdmi_cec_hardirq(int irq, void *data) +{ + struct cec_adapter *adap = data; + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + unsigned int stat = dw_hdmi_read(cec, HDMI_IH_CEC_STAT0); + irqreturn_t ret = IRQ_HANDLED; + + if (stat == 0) + return IRQ_NONE; + + dw_hdmi_write(cec, stat, HDMI_IH_CEC_STAT0); + + if (stat & CEC_STAT_ERROR_INIT) { + cec->tx_status = CEC_TX_STATUS_ERROR; + cec->tx_done = true; + ret = IRQ_WAKE_THREAD; + } else if (stat & CEC_STAT_DONE) { + cec->tx_status = CEC_TX_STATUS_OK; + cec->tx_done = true; + ret = IRQ_WAKE_THREAD; + } else if (stat & CEC_STAT_NACK) { + cec->tx_status = CEC_TX_STATUS_NACK; + cec->tx_done = true; + ret = IRQ_WAKE_THREAD; + } + + if (stat & CEC_STAT_EOM) { + unsigned int len, i; + + len = dw_hdmi_read(cec, HDMI_CEC_RX_CNT); + if (len > sizeof(cec->rx_msg.msg)) + len = sizeof(cec->rx_msg.msg); + + for (i = 0; i < len; i++) + cec->rx_msg.msg[i] = + dw_hdmi_read(cec, HDMI_CEC_RX_DATA0 + i); + + dw_hdmi_write(cec, 0, HDMI_CEC_LOCK); + + cec->rx_msg.len = len; + smp_wmb(); + cec->rx_done = true; + + ret = IRQ_WAKE_THREAD; + } + + return ret; +} + +static irqreturn_t dw_hdmi_cec_thread(int irq, void *data) +{ + struct cec_adapter *adap = data; + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + + if (cec->tx_done) { + cec->tx_done = false; + cec_transmit_attempt_done(adap, cec->tx_status); + } + if (cec->rx_done) { + cec->rx_done = false; + smp_rmb(); + cec_received_msg(adap, &cec->rx_msg); + } + return IRQ_HANDLED; +} + +static int dw_hdmi_cec_enable(struct cec_adapter *adap, bool enable) +{ + struct dw_hdmi_cec *cec = cec_get_drvdata(adap); + + if (!enable) { + dw_hdmi_write(cec, ~0, HDMI_CEC_MASK); + dw_hdmi_write(cec, ~0, HDMI_IH_MUTE_CEC_STAT0); + dw_hdmi_write(cec, 0, HDMI_CEC_POLARITY); + + cec->ops->disable(cec->hdmi); + } else { + unsigned int irqs; + + dw_hdmi_write(cec, 0, HDMI_CEC_CTRL); + dw_hdmi_write(cec, ~0, HDMI_IH_CEC_STAT0); + dw_hdmi_write(cec, 0, HDMI_CEC_LOCK); + + dw_hdmi_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID); + + cec->ops->enable(cec->hdmi); + + irqs = CEC_STAT_ERROR_INIT | CEC_STAT_NACK | CEC_STAT_EOM | + CEC_STAT_DONE; + dw_hdmi_write(cec, irqs, HDMI_CEC_POLARITY); + dw_hdmi_write(cec, ~irqs, HDMI_CEC_MASK); + dw_hdmi_write(cec, ~irqs, HDMI_IH_MUTE_CEC_STAT0); + } + return 0; +} + +static const struct cec_adap_ops dw_hdmi_cec_ops = { + .adap_enable = dw_hdmi_cec_enable, + .adap_log_addr = dw_hdmi_cec_log_addr, + .adap_transmit = dw_hdmi_cec_transmit, +}; + +static void dw_hdmi_cec_del(void *data) +{ + struct dw_hdmi_cec *cec = data; + + cec_delete_adapter(cec->adap); +} + +static int dw_hdmi_cec_probe(struct platform_device *pdev) +{ + struct dw_hdmi_cec_data *data = dev_get_platdata(&pdev->dev); + struct dw_hdmi_cec *cec; + int ret; + + if (!data) + return -ENXIO; + + /* + * Our device is just a convenience - we want to link to the real + * hardware device here, so that userspace can see the association + * between the HDMI hardware and its associated CEC chardev. + */ + cec = devm_kzalloc(&pdev->dev, sizeof(*cec), GFP_KERNEL); + if (!cec) + return -ENOMEM; + + cec->irq = data->irq; + cec->ops = data->ops; + cec->hdmi = data->hdmi; + + platform_set_drvdata(pdev, cec); + + dw_hdmi_write(cec, 0, HDMI_CEC_TX_CNT); + dw_hdmi_write(cec, ~0, HDMI_CEC_MASK); + dw_hdmi_write(cec, ~0, HDMI_IH_MUTE_CEC_STAT0); + dw_hdmi_write(cec, 0, HDMI_CEC_POLARITY); + + cec->adap = cec_allocate_adapter(&dw_hdmi_cec_ops, cec, "dw_hdmi", + CEC_CAP_LOG_ADDRS | CEC_CAP_TRANSMIT | + CEC_CAP_RC | CEC_CAP_PASSTHROUGH, + CEC_MAX_LOG_ADDRS); + if (IS_ERR(cec->adap)) + return PTR_ERR(cec->adap); + + /* override the module pointer */ + cec->adap->owner = THIS_MODULE; + + ret = devm_add_action(&pdev->dev, dw_hdmi_cec_del, cec); + if (ret) { + cec_delete_adapter(cec->adap); + return ret; + } + + ret = devm_request_threaded_irq(&pdev->dev, cec->irq, + dw_hdmi_cec_hardirq, + dw_hdmi_cec_thread, IRQF_SHARED, + "dw-hdmi-cec", cec->adap); + if (ret < 0) + return ret; + + cec->notify = cec_notifier_get(pdev->dev.parent); + if (!cec->notify) + return -ENOMEM; + + ret = cec_register_adapter(cec->adap, pdev->dev.parent); + if (ret < 0) { + cec_notifier_put(cec->notify); + return ret; + } + + /* + * CEC documentation says we must not call cec_delete_adapter + * after a successful call to cec_register_adapter(). + */ + devm_remove_action(&pdev->dev, dw_hdmi_cec_del, cec); + + cec_register_cec_notifier(cec->adap, cec->notify); + + return 0; +} + +static int dw_hdmi_cec_remove(struct platform_device *pdev) +{ + struct dw_hdmi_cec *cec = platform_get_drvdata(pdev); + + cec_unregister_adapter(cec->adap); + cec_notifier_put(cec->notify); + + return 0; +} + +static struct platform_driver dw_hdmi_cec_driver = { + .probe = dw_hdmi_cec_probe, + .remove = dw_hdmi_cec_remove, + .driver = { + .name = "dw-hdmi-cec", + }, +}; +module_platform_driver(dw_hdmi_cec_driver); + +MODULE_AUTHOR("Russell King "); +MODULE_DESCRIPTION("Synopsys Designware HDMI CEC driver for i.MX"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS(PLATFORM_MODULE_PREFIX "dw-hdmi-cec"); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h new file mode 100644 index 0000000000000000000000000000000000000000..cf4dc121a2c43cb805ad7bd27edc68a515b1b1e6 --- /dev/null +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.h @@ -0,0 +1,19 @@ +#ifndef DW_HDMI_CEC_H +#define DW_HDMI_CEC_H + +struct dw_hdmi; + +struct dw_hdmi_cec_ops { + void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); + u8 (*read)(struct dw_hdmi *hdmi, int offset); + void (*enable)(struct dw_hdmi *hdmi); + void (*disable)(struct dw_hdmi *hdmi); +}; + +struct dw_hdmi_cec_data { + struct dw_hdmi *hdmi; + const struct dw_hdmi_cec_ops *ops; + int irq; +}; + +#endif diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c index b2cf59f54c889e528b1a7c7bc6371c899594bd20..3b7e5c59a5e9941df113ff53f4c98664232b5054 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c @@ -1,7 +1,8 @@ /* * dw-hdmi-i2s-audio.c * - * Copyright (c) 2016 Kuninori Morimoto + * Copyright (c) 2017 Renesas Solutions Corp. + * Kuninori Morimoto * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index ead11242c4b91e2fd0f2c2da7a68ff0e72025b3c..bf14214fa4640279fa46b655333198ed5aa1446e 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -35,8 +35,12 @@ #include "dw-hdmi.h" #include "dw-hdmi-audio.h" +#include "dw-hdmi-cec.h" + +#include #define DDC_SEGMENT_ADDR 0x30 + #define HDMI_EDID_LEN 512 enum hdmi_datamap { @@ -130,6 +134,7 @@ struct dw_hdmi { unsigned int version; struct platform_device *audio; + struct platform_device *cec; struct device *dev; struct clk *isfr_clk; struct clk *iahb_clk; @@ -163,6 +168,7 @@ struct dw_hdmi { bool bridge_is_on; /* indicates the bridge is on */ bool rxsense; /* rxsense state */ u8 phy_mask; /* desired phy int mask settings */ + u8 mc_clkdis; /* clock disable register */ spinlock_t audio_lock; struct mutex audio_mutex; @@ -175,6 +181,8 @@ struct dw_hdmi { struct regmap *regm; void (*enable_audio)(struct dw_hdmi *hdmi); void (*disable_audio)(struct dw_hdmi *hdmi); + + struct cec_notifier *cec_notifier; }; #define HDMI_IH_PHY_STAT0_RX_SENSE \ @@ -546,8 +554,11 @@ EXPORT_SYMBOL_GPL(dw_hdmi_set_sample_rate); static void hdmi_enable_audio_clk(struct dw_hdmi *hdmi, bool enable) { - hdmi_modb(hdmi, enable ? 0 : HDMI_MC_CLKDIS_AUDCLK_DISABLE, - HDMI_MC_CLKDIS_AUDCLK_DISABLE, HDMI_MC_CLKDIS); + if (enable) + hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_AUDCLK_DISABLE; + else + hdmi->mc_clkdis |= HDMI_MC_CLKDIS_AUDCLK_DISABLE; + hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); } static void dw_hdmi_ahb_audio_enable(struct dw_hdmi *hdmi) @@ -1317,7 +1328,7 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode) u8 val; /* Initialise info frame from DRM mode */ - drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false); if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format)) frame.colorspace = HDMI_COLORSPACE_YUV444; @@ -1569,8 +1580,6 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, /* HDMI Initialization Step B.4 */ static void dw_hdmi_enable_video_path(struct dw_hdmi *hdmi) { - u8 clkdis; - /* control period minimum duration */ hdmi_writeb(hdmi, 12, HDMI_FC_CTRLDUR); hdmi_writeb(hdmi, 32, HDMI_FC_EXCTRLDUR); @@ -1582,17 +1591,21 @@ static void dw_hdmi_enable_video_path(struct dw_hdmi *hdmi) hdmi_writeb(hdmi, 0x21, HDMI_FC_CH2PREAM); /* Enable pixel clock and tmds data path */ - clkdis = 0x7F; - clkdis &= ~HDMI_MC_CLKDIS_PIXELCLK_DISABLE; - hdmi_writeb(hdmi, clkdis, HDMI_MC_CLKDIS); + hdmi->mc_clkdis |= HDMI_MC_CLKDIS_HDCPCLK_DISABLE | + HDMI_MC_CLKDIS_CSCCLK_DISABLE | + HDMI_MC_CLKDIS_AUDCLK_DISABLE | + HDMI_MC_CLKDIS_PREPCLK_DISABLE | + HDMI_MC_CLKDIS_TMDSCLK_DISABLE; + hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_PIXELCLK_DISABLE; + hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); - clkdis &= ~HDMI_MC_CLKDIS_TMDSCLK_DISABLE; - hdmi_writeb(hdmi, clkdis, HDMI_MC_CLKDIS); + hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_TMDSCLK_DISABLE; + hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); /* Enable csc path */ if (is_color_space_conversion(hdmi)) { - clkdis &= ~HDMI_MC_CLKDIS_CSCCLK_DISABLE; - hdmi_writeb(hdmi, clkdis, HDMI_MC_CLKDIS); + hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_CSCCLK_DISABLE; + hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); } /* Enable color space conversion if needed */ @@ -1783,7 +1796,6 @@ static void initialize_hdmi_ih_mutes(struct dw_hdmi *hdmi) hdmi_writeb(hdmi, 0xff, HDMI_AUD_HBR_MASK); hdmi_writeb(hdmi, 0xff, HDMI_GP_MASK); hdmi_writeb(hdmi, 0xff, HDMI_A_APIINTMSK); - hdmi_writeb(hdmi, 0xff, HDMI_CEC_MASK); hdmi_writeb(hdmi, 0xff, HDMI_I2CM_INT); hdmi_writeb(hdmi, 0xff, HDMI_I2CM_CTLINT); @@ -1896,6 +1908,7 @@ static int dw_hdmi_connector_get_modes(struct drm_connector *connector) hdmi->sink_is_hdmi = drm_detect_hdmi_monitor(edid); hdmi->sink_has_audio = drm_detect_monitor_audio(edid); drm_mode_connector_update_edid_property(connector, edid); + cec_notifier_set_phys_addr_from_edid(hdmi->cec_notifier, edid); ret = drm_add_edid_modes(connector, edid); /* Store the ELD */ drm_edid_to_eld(connector, edid); @@ -1920,7 +1933,6 @@ static void dw_hdmi_connector_force(struct drm_connector *connector) } static const struct drm_connector_funcs dw_hdmi_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, .fill_modes = drm_helper_probe_single_connector_modes, .detect = dw_hdmi_connector_detect, .destroy = drm_connector_cleanup, @@ -2119,11 +2131,16 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id) * ask the source to re-read the EDID. */ if (intr_stat & - (HDMI_IH_PHY_STAT0_RX_SENSE | HDMI_IH_PHY_STAT0_HPD)) + (HDMI_IH_PHY_STAT0_RX_SENSE | HDMI_IH_PHY_STAT0_HPD)) { __dw_hdmi_setup_rx_sense(hdmi, phy_stat & HDMI_PHY_HPD, phy_stat & HDMI_PHY_RX_SENSE); + if ((phy_stat & (HDMI_PHY_RX_SENSE | HDMI_PHY_HPD)) == 0) + cec_notifier_set_phys_addr(hdmi->cec_notifier, + CEC_PHYS_ADDR_INVALID); + } + if (intr_stat & HDMI_IH_PHY_STAT0_HPD) { dev_dbg(hdmi->dev, "EVENT=%s\n", phy_int_pol & HDMI_PHY_HPD ? "plugin" : "plugout"); @@ -2170,6 +2187,7 @@ static const struct dw_hdmi_phy_data dw_hdmi_phys[] = { .name = "DWC HDMI 2.0 TX PHY", .gen = 2, .has_svsret = true, + .configure = hdmi_phy_configure_dwc_hdmi_3d_tx, }, { .type = DW_HDMI_PHY_VENDOR_PHY, .name = "Vendor PHY", @@ -2219,6 +2237,29 @@ static int dw_hdmi_detect_phy(struct dw_hdmi *hdmi) return -ENODEV; } +static void dw_hdmi_cec_enable(struct dw_hdmi *hdmi) +{ + mutex_lock(&hdmi->mutex); + hdmi->mc_clkdis &= ~HDMI_MC_CLKDIS_CECCLK_DISABLE; + hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); + mutex_unlock(&hdmi->mutex); +} + +static void dw_hdmi_cec_disable(struct dw_hdmi *hdmi) +{ + mutex_lock(&hdmi->mutex); + hdmi->mc_clkdis |= HDMI_MC_CLKDIS_CECCLK_DISABLE; + hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); + mutex_unlock(&hdmi->mutex); +} + +static const struct dw_hdmi_cec_ops dw_hdmi_cec_ops = { + .write = hdmi_writeb, + .read = hdmi_readb, + .enable = dw_hdmi_cec_enable, + .disable = dw_hdmi_cec_disable, +}; + static const struct regmap_config hdmi_regmap_8bit_config = { .reg_bits = 32, .val_bits = 8, @@ -2241,6 +2282,7 @@ __dw_hdmi_probe(struct platform_device *pdev, struct device_node *np = dev->of_node; struct platform_device_info pdevinfo; struct device_node *ddc_node; + struct dw_hdmi_cec_data cec; struct dw_hdmi *hdmi; struct resource *iores = NULL; int irq; @@ -2261,6 +2303,7 @@ __dw_hdmi_probe(struct platform_device *pdev, hdmi->disabled = true; hdmi->rxsense = true; hdmi->phy_mask = (u8)~(HDMI_PHY_HPD | HDMI_PHY_RX_SENSE); + hdmi->mc_clkdis = 0x7f; mutex_init(&hdmi->mutex); mutex_init(&hdmi->audio_mutex); @@ -2376,6 +2419,12 @@ __dw_hdmi_probe(struct platform_device *pdev, if (ret) goto err_iahb; + hdmi->cec_notifier = cec_notifier_get(dev); + if (!hdmi->cec_notifier) { + ret = -ENOMEM; + goto err_iahb; + } + /* * To prevent overflows in HDMI_IH_FC_STAT2, set the clk regenerator * N and cts values before enabling phy @@ -2438,6 +2487,19 @@ __dw_hdmi_probe(struct platform_device *pdev, hdmi->audio = platform_device_register_full(&pdevinfo); } + if (config0 & HDMI_CONFIG0_CEC) { + cec.hdmi = hdmi; + cec.ops = &dw_hdmi_cec_ops; + cec.irq = irq; + + pdevinfo.name = "dw-hdmi-cec"; + pdevinfo.data = &cec; + pdevinfo.size_data = sizeof(cec); + pdevinfo.dma_mask = 0; + + hdmi->cec = platform_device_register_full(&pdevinfo); + } + /* Reset HDMI DDC I2C master controller and mute I2CM interrupts */ if (hdmi->i2c) dw_hdmi_i2c_init(hdmi); @@ -2452,6 +2514,9 @@ __dw_hdmi_probe(struct platform_device *pdev, hdmi->ddc = NULL; } + if (hdmi->cec_notifier) + cec_notifier_put(hdmi->cec_notifier); + clk_disable_unprepare(hdmi->iahb_clk); err_isfr: clk_disable_unprepare(hdmi->isfr_clk); @@ -2465,10 +2530,15 @@ static void __dw_hdmi_remove(struct dw_hdmi *hdmi) { if (hdmi->audio && !IS_ERR(hdmi->audio)) platform_device_unregister(hdmi->audio); + if (!IS_ERR(hdmi->cec)) + platform_device_unregister(hdmi->cec); /* Disable all interrupts */ hdmi_writeb(hdmi, ~0, HDMI_IH_MUTE_PHY_STAT0); + if (hdmi->cec_notifier) + cec_notifier_put(hdmi->cec_notifier); + clk_disable_unprepare(hdmi->iahb_clk); clk_disable_unprepare(hdmi->isfr_clk); @@ -2485,17 +2555,12 @@ int dw_hdmi_probe(struct platform_device *pdev, const struct dw_hdmi_plat_data *plat_data) { struct dw_hdmi *hdmi; - int ret; hdmi = __dw_hdmi_probe(pdev, plat_data); if (IS_ERR(hdmi)) return PTR_ERR(hdmi); - ret = drm_bridge_add(&hdmi->bridge); - if (ret < 0) { - __dw_hdmi_remove(hdmi); - return ret; - } + drm_bridge_add(&hdmi->bridge); return 0; } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h index c59f87e1483eadc7a8ecd4dcee4345514fed3668..9d90eb9c46e5910165c6e36b39c62e5ce9150c2e 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h @@ -478,51 +478,6 @@ #define HDMI_A_PRESETUP 0x501A #define HDMI_A_SRM_BASE 0x5020 -/* CEC Engine Registers */ -#define HDMI_CEC_CTRL 0x7D00 -#define HDMI_CEC_STAT 0x7D01 -#define HDMI_CEC_MASK 0x7D02 -#define HDMI_CEC_POLARITY 0x7D03 -#define HDMI_CEC_INT 0x7D04 -#define HDMI_CEC_ADDR_L 0x7D05 -#define HDMI_CEC_ADDR_H 0x7D06 -#define HDMI_CEC_TX_CNT 0x7D07 -#define HDMI_CEC_RX_CNT 0x7D08 -#define HDMI_CEC_TX_DATA0 0x7D10 -#define HDMI_CEC_TX_DATA1 0x7D11 -#define HDMI_CEC_TX_DATA2 0x7D12 -#define HDMI_CEC_TX_DATA3 0x7D13 -#define HDMI_CEC_TX_DATA4 0x7D14 -#define HDMI_CEC_TX_DATA5 0x7D15 -#define HDMI_CEC_TX_DATA6 0x7D16 -#define HDMI_CEC_TX_DATA7 0x7D17 -#define HDMI_CEC_TX_DATA8 0x7D18 -#define HDMI_CEC_TX_DATA9 0x7D19 -#define HDMI_CEC_TX_DATA10 0x7D1a -#define HDMI_CEC_TX_DATA11 0x7D1b -#define HDMI_CEC_TX_DATA12 0x7D1c -#define HDMI_CEC_TX_DATA13 0x7D1d -#define HDMI_CEC_TX_DATA14 0x7D1e -#define HDMI_CEC_TX_DATA15 0x7D1f -#define HDMI_CEC_RX_DATA0 0x7D20 -#define HDMI_CEC_RX_DATA1 0x7D21 -#define HDMI_CEC_RX_DATA2 0x7D22 -#define HDMI_CEC_RX_DATA3 0x7D23 -#define HDMI_CEC_RX_DATA4 0x7D24 -#define HDMI_CEC_RX_DATA5 0x7D25 -#define HDMI_CEC_RX_DATA6 0x7D26 -#define HDMI_CEC_RX_DATA7 0x7D27 -#define HDMI_CEC_RX_DATA8 0x7D28 -#define HDMI_CEC_RX_DATA9 0x7D29 -#define HDMI_CEC_RX_DATA10 0x7D2a -#define HDMI_CEC_RX_DATA11 0x7D2b -#define HDMI_CEC_RX_DATA12 0x7D2c -#define HDMI_CEC_RX_DATA13 0x7D2d -#define HDMI_CEC_RX_DATA14 0x7D2e -#define HDMI_CEC_RX_DATA15 0x7D2f -#define HDMI_CEC_LOCK 0x7D30 -#define HDMI_CEC_WKUPCTRL 0x7D31 - /* I2C Master Registers (E-DDC) */ #define HDMI_I2CM_SLAVE 0x7E00 #define HDMI_I2CM_ADDRESS 0x7E01 @@ -555,6 +510,7 @@ enum { /* CONFIG0_ID field values */ HDMI_CONFIG0_I2S = 0x10, + HDMI_CONFIG0_CEC = 0x02, /* CONFIG1_ID field values */ HDMI_CONFIG1_AHB = 0x01, diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c new file mode 100644 index 0000000000000000000000000000000000000000..63c7a01b7053eb80dfc852b83bfbd04b79d0a75a --- /dev/null +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -0,0 +1,981 @@ +/* + * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd + * Copyright (C) STMicroelectronics SA 2017 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Modified by Philippe Cornu + * This generic Synopsys DesignWare MIPI DSI host driver is based on the + * Rockchip version from rockchip/dw-mipi-dsi.c with phy & bridge APIs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include