From 83f45fc360c8e16a330474860ebda872d1384c8c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 09:10:18 +0200 Subject: [PATCH 0001/1868] drm: Don't grab an fb reference for the idr The current refcounting scheme is that the fb lookup idr also holds a reference. This works out nicely bacause thus far we've always explicitly cleaned up idr entries for framebuffers: - Userspace fbs get removed in the rmfb ioctl or when the drm file gets closed. - Kernel fbs (for fbdev emulation) get cleaned up by the driver code at module unload time. But now i915 also reconstructs the bios fbs for a smooth transition. And that fb is purely transitional and should get removed immmediately once all crtcs stop using it. Of course if the i915 fbdev code decides to reuse it as the main fbdev fb then it shouldn't be cleaned up, but in that case the fbdev code will grab it's own reference. The problem is now that we also want to register that takeover fb in the idr, so that userspace can do a smooth transition (animated maybe even!) itself. But currently we have no one who will clean up the idr reference once that fb isn't useful any more, and so essentially leak it. Fix this by no longer holding a full fb reference for the idr, but instead just have a weak reference using kref_get_unless_zero. But that requires us to synchronize and clean up with the idr and fb_lock in drm_framebuffer_free, so add that. It's a bit ugly that we have to unconditionally grab the fb_lock, but without that someone might creep through a race. This leak was caught by the fb leak check in drm_mode_config_cleanup. Originally the leak was introduced in commit 46f297fb83d4f9a6f6891964beb184664341a28b Author: Jesse Barnes Date: Fri Mar 7 08:57:48 2014 -0800 drm/i915: add plane_config fetching infrastructure v2 Cc: Jesse Barnes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77511 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 46 +++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index fa2be249999c..33ff631c8d23 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -515,9 +515,6 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, if (ret) goto out; - /* Grab the idr reference. */ - drm_framebuffer_reference(fb); - dev->mode_config.num_fb++; list_add(&fb->head, &dev->mode_config.fb_list); out: @@ -527,10 +524,34 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, } EXPORT_SYMBOL(drm_framebuffer_init); +/* dev->mode_config.fb_lock must be held! */ +static void __drm_framebuffer_unregister(struct drm_device *dev, + struct drm_framebuffer *fb) +{ + mutex_lock(&dev->mode_config.idr_mutex); + idr_remove(&dev->mode_config.crtc_idr, fb->base.id); + mutex_unlock(&dev->mode_config.idr_mutex); + + fb->base.id = 0; +} + static void drm_framebuffer_free(struct kref *kref) { struct drm_framebuffer *fb = container_of(kref, struct drm_framebuffer, refcount); + struct drm_device *dev = fb->dev; + + /* + * The lookup idr holds a weak reference, which has not necessarily been + * removed at this point. Check for that. + */ + mutex_lock(&dev->mode_config.fb_lock); + if (fb->base.id) { + /* Mark fb as reaped and drop idr ref. */ + __drm_framebuffer_unregister(dev, fb); + } + mutex_unlock(&dev->mode_config.fb_lock); + fb->funcs->destroy(fb); } @@ -567,8 +588,10 @@ struct drm_framebuffer *drm_framebuffer_lookup(struct drm_device *dev, mutex_lock(&dev->mode_config.fb_lock); fb = __drm_framebuffer_lookup(dev, id); - if (fb) - drm_framebuffer_reference(fb); + if (fb) { + if (!kref_get_unless_zero(&fb->refcount)) + fb = NULL; + } mutex_unlock(&dev->mode_config.fb_lock); return fb; @@ -612,19 +635,6 @@ static void __drm_framebuffer_unreference(struct drm_framebuffer *fb) kref_put(&fb->refcount, drm_framebuffer_free_bug); } -/* dev->mode_config.fb_lock must be held! */ -static void __drm_framebuffer_unregister(struct drm_device *dev, - struct drm_framebuffer *fb) -{ - mutex_lock(&dev->mode_config.idr_mutex); - idr_remove(&dev->mode_config.crtc_idr, fb->base.id); - mutex_unlock(&dev->mode_config.idr_mutex); - - fb->base.id = 0; - - __drm_framebuffer_unreference(fb); -} - /** * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr * @fb: fb to unregister -- GitLab From ea6763c104c93acb6554659fe4a3c9e9328a4b51 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 11:36:38 +0200 Subject: [PATCH 0002/1868] video/fbdev: Always built-in video= cmdline parsing In drm/i915 we want to get at the video= cmdline modes even when we don't have fbdev support enabled, so that users can always override the kernel's initial mode selection. But that gives us a direct depency upon the parsing code in the fbdev subsystem. Since it's so little code just extract these 2 functions and always build them in. Whiel at it fix the checkpatch fail in this code. v2: Also move fb_mode_option. Spotted by the kbuild. v3: Review from Geert: - Keep the old copyright notice from fb_mem.c, although I have no idea what exactly applies. - Only compile this when needed. Cc: Geert Uytterhoeven Cc: Plagniol-Villard Cc: Tomi Valkeinen Cc: linux-fbdev@vger.kernel.org Signed-off-by: Daniel Vetter -- I prefer if we can merge this through drm-next since we'll use it there in follow-up patches. -Daniel --- drivers/video/fbdev/Kconfig | 4 + drivers/video/fbdev/core/Makefile | 1 + drivers/video/fbdev/core/fb_cmdline.c | 110 ++++++++++++++++++++++++++ drivers/video/fbdev/core/fbmem.c | 92 --------------------- drivers/video/fbdev/core/modedb.c | 3 - 5 files changed, 115 insertions(+), 95 deletions(-) create mode 100644 drivers/video/fbdev/core/fb_cmdline.c diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 59c98bfd5a8a..f1458c95a688 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -4,6 +4,7 @@ menuconfig FB tristate "Support for frame buffer devices" + select FB_CMDLINE ---help--- The frame buffer device provides an abstraction for the graphics hardware. It represents the frame buffer of some video hardware and @@ -52,6 +53,9 @@ config FIRMWARE_EDID combination with certain motherboards and monitors are known to suffer from this problem. +config FB_CMDLINE + bool + config FB_DDC tristate depends on FB diff --git a/drivers/video/fbdev/core/Makefile b/drivers/video/fbdev/core/Makefile index fa306538dac2..67f28e20a892 100644 --- a/drivers/video/fbdev/core/Makefile +++ b/drivers/video/fbdev/core/Makefile @@ -1,4 +1,5 @@ obj-y += fb_notify.o +obj-$(CONFIG_FB_CMDLINE) += fb_cmdline.o obj-$(CONFIG_FB) += fb.o fb-y := fbmem.o fbmon.o fbcmap.o fbsysfs.o \ modedb.o fbcvt.o diff --git a/drivers/video/fbdev/core/fb_cmdline.c b/drivers/video/fbdev/core/fb_cmdline.c new file mode 100644 index 000000000000..39509ccd92f1 --- /dev/null +++ b/drivers/video/fbdev/core/fb_cmdline.c @@ -0,0 +1,110 @@ +/* + * linux/drivers/video/fb_cmdline.c + * + * Copyright (C) 2014 Intel Corp + * Copyright (C) 1994 Martin Schaller + * + * 2001 - Documented with DocBook + * - Brad Douglas + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * Authors: + * Vetter + */ +#include +#include + +static char *video_options[FB_MAX] __read_mostly; +static int ofonly __read_mostly; + +const char *fb_mode_option; +EXPORT_SYMBOL_GPL(fb_mode_option); + +/** + * fb_get_options - get kernel boot parameters + * @name: framebuffer name as it would appear in + * the boot parameter line + * (video=:) + * @option: the option will be stored here + * + * NOTE: Needed to maintain backwards compatibility + */ +int fb_get_options(const char *name, char **option) +{ + char *opt, *options = NULL; + int retval = 0; + int name_len = strlen(name), i; + + if (name_len && ofonly && strncmp(name, "offb", 4)) + retval = 1; + + if (name_len && !retval) { + for (i = 0; i < FB_MAX; i++) { + if (video_options[i] == NULL) + continue; + if (!video_options[i][0]) + continue; + opt = video_options[i]; + if (!strncmp(name, opt, name_len) && + opt[name_len] == ':') + options = opt + name_len + 1; + } + } + /* No match, pass global option */ + if (!options && option && fb_mode_option) + options = kstrdup(fb_mode_option, GFP_KERNEL); + if (options && !strncmp(options, "off", 3)) + retval = 1; + + if (option) + *option = options; + + return retval; +} +EXPORT_SYMBOL(fb_get_options); + +/** + * video_setup - process command line options + * @options: string of options + * + * Process command line options for frame buffer subsystem. + * + * NOTE: This function is a __setup and __init function. + * It only stores the options. Drivers have to call + * fb_get_options() as necessary. + * + * Returns zero. + * + */ +static int __init video_setup(char *options) +{ + int i, global = 0; + + if (!options || !*options) + global = 1; + + if (!global && !strncmp(options, "ofonly", 6)) { + ofonly = 1; + global = 1; + } + + if (!global && !strchr(options, ':')) { + fb_mode_option = options; + global = 1; + } + + if (!global) { + for (i = 0; i < FB_MAX; i++) { + if (video_options[i] == NULL) { + video_options[i] = options; + break; + } + } + } + + return 1; +} +__setup("video=", video_setup); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index b5e85f6c1c26..0705d8883ede 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1908,96 +1908,4 @@ int fb_new_modelist(struct fb_info *info) return err; } -static char *video_options[FB_MAX] __read_mostly; -static int ofonly __read_mostly; - -/** - * fb_get_options - get kernel boot parameters - * @name: framebuffer name as it would appear in - * the boot parameter line - * (video=:) - * @option: the option will be stored here - * - * NOTE: Needed to maintain backwards compatibility - */ -int fb_get_options(const char *name, char **option) -{ - char *opt, *options = NULL; - int retval = 0; - int name_len = strlen(name), i; - - if (name_len && ofonly && strncmp(name, "offb", 4)) - retval = 1; - - if (name_len && !retval) { - for (i = 0; i < FB_MAX; i++) { - if (video_options[i] == NULL) - continue; - if (!video_options[i][0]) - continue; - opt = video_options[i]; - if (!strncmp(name, opt, name_len) && - opt[name_len] == ':') - options = opt + name_len + 1; - } - } - /* No match, pass global option */ - if (!options && option && fb_mode_option) - options = kstrdup(fb_mode_option, GFP_KERNEL); - if (options && !strncmp(options, "off", 3)) - retval = 1; - - if (option) - *option = options; - - return retval; -} -EXPORT_SYMBOL(fb_get_options); - -#ifndef MODULE -/** - * video_setup - process command line options - * @options: string of options - * - * Process command line options for frame buffer subsystem. - * - * NOTE: This function is a __setup and __init function. - * It only stores the options. Drivers have to call - * fb_get_options() as necessary. - * - * Returns zero. - * - */ -static int __init video_setup(char *options) -{ - int i, global = 0; - - if (!options || !*options) - global = 1; - - if (!global && !strncmp(options, "ofonly", 6)) { - ofonly = 1; - global = 1; - } - - if (!global && !strchr(options, ':')) { - fb_mode_option = options; - global = 1; - } - - if (!global) { - for (i = 0; i < FB_MAX; i++) { - if (video_options[i] == NULL) { - video_options[i] = options; - break; - } - - } - } - - return 1; -} -__setup("video=", video_setup); -#endif - MODULE_LICENSE("GPL"); diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c index a9a907c440d7..388f7971494b 100644 --- a/drivers/video/fbdev/core/modedb.c +++ b/drivers/video/fbdev/core/modedb.c @@ -29,9 +29,6 @@ #define DPRINTK(fmt, args...) #endif -const char *fb_mode_option; -EXPORT_SYMBOL_GPL(fb_mode_option); - /* * Standard video mode definitions (taken from XFree86) */ -- GitLab From eaf99c749d43ae74ac7ffece5512f3c73f01dfd2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Aug 2014 10:08:32 +0200 Subject: [PATCH 0003/1868] drm: Perform cmdline mode parsing during connector initialisation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915.ko has a custom fbdev initialisation routine that aims to preserve the current mode set by the BIOS, unless overruled by the user. The user's wishes are determined by what, if any, mode is specified on the command line (via the video= parameter). However, that command line mode is first parsed by drm_fb_helper_initial_config() which is called after i915.ko's custom initial_config() as a fallback method. So in order for us to honour it, we need to move the cmdline parser earlier. If we perform the connector cmdline parsing as soon as we initialise the connector, that cmdline mode and forced status is then available even if the fbdev helper is not compiled in or never called. We also then expose the cmdline user mode in the connector mode lists. v2: Rebase after connector->name upheaval. v3: Adapt mga200 to look for the cmdline mode in the new place. Nicely simplifies things while at that. v4: Fix checkpatch. v5: Select FB_CMDLINE to adapt to the changed fbdev patch. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73154 Signed-off-by: Chris Wilson (v2) Cc: Jesse Barnes Cc: Ville Syrjälä Cc: Daniel Vetter Reviewed-by: Jesse Barnes (v2) Cc: dri-devel@lists.freedesktop.org Cc: Julia Lemire Cc: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/drm_crtc.c | 55 ++++++++++++++++++++++ drivers/gpu/drm/drm_fb_helper.c | 64 ++------------------------ drivers/gpu/drm/drm_modes.c | 1 + drivers/gpu/drm/drm_probe_helper.c | 17 +++++++ drivers/gpu/drm/mgag200/mgag200_mode.c | 21 ++------- include/drm/drm_crtc.h | 1 + include/drm/drm_fb_helper.h | 1 - 8 files changed, 83 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31894c8c1773..367f5dd23291 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -8,6 +8,7 @@ menuconfig DRM tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)" depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU && HAS_DMA select HDMI + select FB_CMDLINE select I2C select I2C_ALGOBIT select DMA_SHARED_BUFFER diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 33ff631c8d23..66d3bfb8d264 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -862,6 +862,59 @@ static void drm_mode_remove(struct drm_connector *connector, drm_mode_destroy(connector->dev, mode); } +/** + * drm_connector_get_cmdline_mode - reads the user's cmdline mode + * @connector: connector to quwery + * @mode: returned mode + * + * The kernel supports per-connector configration of its consoles through + * use of the video= parameter. This function parses that option and + * extracts the user's specified mode (or enable/disable status) for a + * particular connector. This is typically only used during the early fbdev + * setup. + */ +static void drm_connector_get_cmdline_mode(struct drm_connector *connector) +{ + struct drm_cmdline_mode *mode = &connector->cmdline_mode; + char *option = NULL; + + if (fb_get_options(connector->name, &option)) + return; + + if (!drm_mode_parse_command_line_for_connector(option, + connector, + mode)) + return; + + if (mode->force) { + const char *s; + + switch (mode->force) { + case DRM_FORCE_OFF: + s = "OFF"; + break; + case DRM_FORCE_ON_DIGITAL: + s = "ON - dig"; + break; + default: + case DRM_FORCE_ON: + s = "ON"; + break; + } + + DRM_INFO("forcing %s connector %s\n", connector->name, s); + connector->force = mode->force; + } + + DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n", + connector->name, + mode->xres, mode->yres, + mode->refresh_specified ? mode->refresh : 60, + mode->rb ? " reduced blanking" : "", + mode->margins ? " with margins" : "", + mode->interlace ? " interlaced" : ""); +} + /** * drm_connector_init - Init a preallocated connector * @dev: DRM device @@ -914,6 +967,8 @@ int drm_connector_init(struct drm_device *dev, connector->edid_blob_ptr = NULL; connector->status = connector_status_unknown; + drm_connector_get_cmdline_mode(connector); + list_add_tail(&connector->head, &dev->mode_config.connector_list); dev->mode_config.num_connector++; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3144db9dc0f1..3a6b6635e3f5 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -171,60 +171,6 @@ int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper, } EXPORT_SYMBOL(drm_fb_helper_remove_one_connector); -static int drm_fb_helper_parse_command_line(struct drm_fb_helper *fb_helper) -{ - struct drm_fb_helper_connector *fb_helper_conn; - int i; - - for (i = 0; i < fb_helper->connector_count; i++) { - struct drm_cmdline_mode *mode; - struct drm_connector *connector; - char *option = NULL; - - fb_helper_conn = fb_helper->connector_info[i]; - connector = fb_helper_conn->connector; - mode = &fb_helper_conn->cmdline_mode; - - /* do something on return - turn off connector maybe */ - if (fb_get_options(connector->name, &option)) - continue; - - if (drm_mode_parse_command_line_for_connector(option, - connector, - mode)) { - if (mode->force) { - const char *s; - switch (mode->force) { - case DRM_FORCE_OFF: - s = "OFF"; - break; - case DRM_FORCE_ON_DIGITAL: - s = "ON - dig"; - break; - default: - case DRM_FORCE_ON: - s = "ON"; - break; - } - - DRM_INFO("forcing %s connector %s\n", - connector->name, s); - connector->force = mode->force; - } - - DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n", - connector->name, - mode->xres, mode->yres, - mode->refresh_specified ? mode->refresh : 60, - mode->rb ? " reduced blanking" : "", - mode->margins ? " with margins" : "", - mode->interlace ? " interlaced" : ""); - } - - } - return 0; -} - static void drm_fb_helper_save_lut_atomic(struct drm_crtc *crtc, struct drm_fb_helper *helper) { uint16_t *r_base, *g_base, *b_base; @@ -1013,7 +959,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i]; struct drm_cmdline_mode *cmdline_mode; - cmdline_mode = &fb_helper_conn->cmdline_mode; + cmdline_mode = &fb_helper_conn->connector->cmdline_mode; if (cmdline_mode->bpp_specified) { switch (cmdline_mode->bpp) { @@ -1260,9 +1206,7 @@ EXPORT_SYMBOL(drm_has_preferred_mode); static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector) { - struct drm_cmdline_mode *cmdline_mode; - cmdline_mode = &fb_connector->cmdline_mode; - return cmdline_mode->specified; + return fb_connector->connector->cmdline_mode.specified; } struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn, @@ -1272,7 +1216,7 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f struct drm_display_mode *mode = NULL; bool prefer_non_interlace; - cmdline_mode = &fb_helper_conn->cmdline_mode; + cmdline_mode = &fb_helper_conn->connector->cmdline_mode; if (cmdline_mode->specified == false) return mode; @@ -1657,8 +1601,6 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel) struct drm_device *dev = fb_helper->dev; int count = 0; - drm_fb_helper_parse_command_line(fb_helper); - mutex_lock(&dev->mode_config.mutex); count = drm_fb_helper_probe_connector_modes(fb_helper, dev->mode_config.max_width, diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index bedf1894e17e..d1b7d2006529 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1259,6 +1259,7 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev, if (!mode) return NULL; + mode->type |= DRM_MODE_TYPE_USERDEF; drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); return mode; } diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index db7d250f7ac7..6857e9ad6339 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -82,6 +82,22 @@ static void drm_mode_validate_flag(struct drm_connector *connector, return; } +static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector) +{ + struct drm_display_mode *mode; + + if (!connector->cmdline_mode.specified) + return 0; + + mode = drm_mode_create_from_cmdline_mode(connector->dev, + &connector->cmdline_mode); + if (mode == NULL) + return 0; + + drm_mode_probed_add(connector, mode); + return 1; +} + static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector, uint32_t maxX, uint32_t maxY, bool merge_type_bits) { @@ -141,6 +157,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect if (count == 0 && connector->status == connector_status_connected) count = drm_add_modes_noedid(connector, 1024, 768); + count += drm_helper_probe_add_cmdline_mode(connector); if (count == 0) goto prune; diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index 45f04dea0ac2..83485ab81ce8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1483,11 +1483,7 @@ static int mga_vga_mode_valid(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct mga_device *mdev = (struct mga_device*)dev->dev_private; - struct mga_fbdev *mfbdev = mdev->mfbdev; - struct drm_fb_helper *fb_helper = &mfbdev->helper; - struct drm_fb_helper_connector *fb_helper_conn = NULL; int bpp = 32; - int i = 0; if (IS_G200_SE(mdev)) { if (mdev->unique_rev_id == 0x01) { @@ -1537,21 +1533,14 @@ static int mga_vga_mode_valid(struct drm_connector *connector, } /* Validate the mode input by the user */ - for (i = 0; i < fb_helper->connector_count; i++) { - if (fb_helper->connector_info[i]->connector == connector) { - /* Found the helper for this connector */ - fb_helper_conn = fb_helper->connector_info[i]; - if (fb_helper_conn->cmdline_mode.specified) { - if (fb_helper_conn->cmdline_mode.bpp_specified) { - bpp = fb_helper_conn->cmdline_mode.bpp; - } - } - } + if (connector->cmdline_mode.specified) { + if (connector->cmdline_mode.bpp_specified) + bpp = connector->cmdline_mode.bpp; } if ((mode->hdisplay * mode->vdisplay * (bpp/8)) > mdev->mc.vram_size) { - if (fb_helper_conn) - fb_helper_conn->cmdline_mode.specified = false; + if (connector->cmdline_mode.specified) + connector->cmdline_mode.specified = false; return MODE_BAD; } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index f1105d0da059..c530b4920a09 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -548,6 +548,7 @@ struct drm_connector { void *helper_private; /* forced on connector */ + struct drm_cmdline_mode cmdline_mode; enum drm_connector_force force; bool override_edid; uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index bfd329d613c4..f4ad254e3488 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -77,7 +77,6 @@ struct drm_fb_helper_funcs { struct drm_fb_helper_connector { struct drm_connector *connector; - struct drm_cmdline_mode cmdline_mode; }; struct drm_fb_helper { -- GitLab From ddde43711fdde505ac413102faa2352704cd858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:02:50 +0300 Subject: [PATCH 0004/1868] drm: Warn when leaking flip events on close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warn when there are events on the file_priv->event_list just before file_priv gets freed. This can occur if the driver doesn't clean up pending page flip events in ->preclose(). Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/drm_fops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 8f91062db5b6..0fa4dadac4c6 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -529,6 +529,8 @@ int drm_release(struct inode *inode, struct file *filp) if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_prime_destroy_file_private(&file_priv->prime); + WARN_ON(!list_empty(&file_priv->event_list)); + put_pid(file_priv->pid); kfree(file_priv); -- GitLab From e6ae8687a87b1fe5c25e824c8ad300f5587eb622 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 6 Aug 2014 13:16:59 -0400 Subject: [PATCH 0005/1868] drm: idiot-proof vblank After spending slightly more time than I'd care to admit debugging the various and presumably spectacular way things fail when you pass too low a value to drm_vblank_init() (thanks console-lock for not letting me see the carnage!), I decided it might be a good idea to add some sanity checking. Signed-off-by: Rob Clark Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 0de123afdb34..6f16a104d6d0 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -730,6 +730,8 @@ EXPORT_SYMBOL(drm_get_last_vbltimestamp); */ u32 drm_vblank_count(struct drm_device *dev, int crtc) { + if (WARN_ON(crtc >= dev->num_crtcs)) + return 0; return atomic_read(&dev->vblank[crtc].count); } EXPORT_SYMBOL(drm_vblank_count); @@ -752,6 +754,9 @@ u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc, { u32 cur_vblank; + if (WARN_ON(crtc >= dev->num_crtcs)) + return 0; + /* Read timestamp from slot of _vblank_time ringbuffer * that corresponds to current vblank count. Retry if * count has incremented during readout. This works like @@ -927,6 +932,9 @@ int drm_vblank_get(struct drm_device *dev, int crtc) unsigned long irqflags; int ret = 0; + if (WARN_ON(crtc >= dev->num_crtcs)) + return -EINVAL; + spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Going from 0->1 means we have to enable interrupts again */ if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) { @@ -975,6 +983,9 @@ void drm_vblank_put(struct drm_device *dev, int crtc) { BUG_ON(atomic_read(&dev->vblank[crtc].refcount) == 0); + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&dev->vblank[crtc].refcount) && (drm_vblank_offdelay > 0)) @@ -1019,6 +1030,9 @@ void drm_vblank_off(struct drm_device *dev, int crtc) unsigned long irqflags; unsigned int seq; + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + spin_lock_irqsave(&dev->vbl_lock, irqflags); vblank_disable_and_save(dev, crtc); wake_up(&dev->vblank[crtc].queue); @@ -1078,6 +1092,9 @@ void drm_vblank_on(struct drm_device *dev, int crtc) { unsigned long irqflags; + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + spin_lock_irqsave(&dev->vbl_lock, irqflags); /* re-enable interrupts if there's are users left */ if (atomic_read(&dev->vblank[crtc].refcount) != 0) @@ -1131,6 +1148,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) /* vblank is not initialized (IRQ not installed ?), or has been freed */ if (!dev->num_crtcs) return; + + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + /* * To avoid all the problems that might happen if interrupts * were enabled/disabled around or between these calls, we just @@ -1439,6 +1460,9 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) if (!dev->num_crtcs) return false; + if (WARN_ON(crtc >= dev->num_crtcs)) + return false; + /* Need timestamp lock to prevent concurrent execution with * vblank enable/disable, as this would cause inconsistent * or corrupted timestamps and vblank counts. -- GitLab From 7ffd7a68511c710b84db3548a1997fd2625f580a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:44 +0300 Subject: [PATCH 0006/1868] drm: Always reject drm_vblank_get() after drm_vblank_off() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure drm_vblank_get() never succeeds when called between drm_vblank_off() and drm_vblank_on(). Borrow a trick from the old drm_vblank_{pre,post}_modeset() functions and just bump the refcount in drm_vblank_off() and drop it in drm_vblank_on(). When drm_vblank_get() encounters a >0 refcount and the vblank interrupt is already disabled it will simply return -EINVAL. Hopefully the use of inmodeset won't conflict badly with drm_vblank_{pre,post}_modeset(). For i915 there's a window between drm_vblank_off() and marking the crtc as inactive where the current code still allows drm_vblank_get(). v2: Describe what drm_vblank_get() does to explain how a simple refcount bump manages to fix things (Daniel) Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 0de123afdb34..b16a63622bad 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1039,6 +1039,15 @@ void drm_vblank_off(struct drm_device *dev, int crtc) } spin_unlock(&dev->event_lock); + /* + * Prevent subsequent drm_vblank_get() from re-enabling + * the vblank interrupt by bumping the refcount. + */ + if (!dev->vblank[crtc].inmodeset) { + atomic_inc(&dev->vblank[crtc].refcount); + dev->vblank[crtc].inmodeset = 1; + } + spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } EXPORT_SYMBOL(drm_vblank_off); @@ -1079,6 +1088,11 @@ void drm_vblank_on(struct drm_device *dev, int crtc) unsigned long irqflags; spin_lock_irqsave(&dev->vbl_lock, irqflags); + /* Drop our private "prevent drm_vblank_get" refcount */ + if (dev->vblank[crtc].inmodeset) { + atomic_dec(&dev->vblank[crtc].refcount); + dev->vblank[crtc].inmodeset = 0; + } /* re-enable interrupts if there's are users left */ if (atomic_read(&dev->vblank[crtc].refcount) != 0) WARN_ON(drm_vblank_enable(dev, crtc)); -- GitLab From 08c71e5e817a956389af5da5e99ab3e26d5c673d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:45 +0300 Subject: [PATCH 0007/1868] drm/i915: Warn if drm_vblank_get() still works after drm_vblank_off() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Drop the drm_vblank_off() (Daniel) Use drm_crtc_vblank_{get,put}() Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 99eb7cad62a8..8f6b932d8e79 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1341,6 +1341,12 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv, } } +static void assert_vblank_disabled(struct drm_crtc *crtc) +{ + if (WARN_ON(drm_crtc_vblank_get(crtc) == 0)) + drm_crtc_vblank_put(crtc); +} + static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) { u32 val; @@ -3905,6 +3911,8 @@ static void intel_crtc_enable_planes(struct drm_crtc *crtc) int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; + assert_vblank_disabled(crtc); + drm_vblank_on(dev, pipe); intel_enable_primary_hw_plane(dev_priv, plane, pipe); @@ -3954,6 +3962,8 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); drm_vblank_off(dev, pipe); + + assert_vblank_disabled(crtc); } static void ironlake_crtc_enable(struct drm_crtc *crtc) -- GitLab From 844b03f27739135fe1fed2fef06da0ffc4c7a081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:46 +0300 Subject: [PATCH 0008/1868] drm: Don't clear vblank timestamps when vblank interrupt is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clearing the timestamps causes us to send zeroed timestamps to userspace if they get sent out in response to the drm_vblank_off(). It's better to send the very latest timestamp and count instead. Testcase: igt/kms_flip/modeset-vs-vblank-race Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b16a63622bad..65d2da9b604b 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -55,14 +55,6 @@ */ #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000 -/* - * Clear vblank timestamp buffer for a crtc. - */ -static void clear_vblank_timestamps(struct drm_device *dev, int crtc) -{ - memset(dev->vblank[crtc].time, 0, sizeof(dev->vblank[crtc].time)); -} - /* * Disable vblank irq's on crtc, make sure that last vblank count * of hardware and corresponding consistent software vblank counter @@ -131,9 +123,6 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) smp_mb__after_atomic(); } - /* Invalidate all timestamps while vblank irq's are off. */ - clear_vblank_timestamps(dev, crtc); - spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); } -- GitLab From 13b030af54a5e307cbcccdf5479873fbc4b7f185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:47 +0300 Subject: [PATCH 0009/1868] drm: Move drm_update_vblank_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move drm_update_vblank_count() to avoid forward a declaration. No functional change. Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 128 +++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 65d2da9b604b..af965174b083 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -55,6 +55,70 @@ */ #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000 +/** + * drm_update_vblank_count - update the master vblank counter + * @dev: DRM device + * @crtc: counter to update + * + * Call back into the driver to update the appropriate vblank counter + * (specified by @crtc). Deal with wraparound, if it occurred, and + * update the last read value so we can deal with wraparound on the next + * call if necessary. + * + * Only necessary when going from off->on, to account for frames we + * didn't get an interrupt for. + * + * Note: caller must hold dev->vbl_lock since this reads & writes + * device vblank fields. + */ +static void drm_update_vblank_count(struct drm_device *dev, int crtc) +{ + u32 cur_vblank, diff, tslot, rc; + struct timeval t_vblank; + + /* + * Interrupts were disabled prior to this call, so deal with counter + * wrap if needed. + * NOTE! It's possible we lost a full dev->max_vblank_count events + * here if the register is small or we had vblank interrupts off for + * a long time. + * + * We repeat the hardware vblank counter & timestamp query until + * we get consistent results. This to prevent races between gpu + * updating its hardware counter while we are retrieving the + * corresponding vblank timestamp. + */ + do { + cur_vblank = dev->driver->get_vblank_counter(dev, crtc); + rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0); + } while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc)); + + /* Deal with counter wrap */ + diff = cur_vblank - dev->vblank[crtc].last; + if (cur_vblank < dev->vblank[crtc].last) { + diff += dev->max_vblank_count; + + DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n", + crtc, dev->vblank[crtc].last, cur_vblank, diff); + } + + DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", + crtc, diff); + + /* Reinitialize corresponding vblank timestamp if high-precision query + * available. Skip this step if query unsupported or failed. Will + * reinitialize delayed at next vblank interrupt in that case. + */ + if (rc) { + tslot = atomic_read(&dev->vblank[crtc].count) + diff; + vblanktimestamp(dev, crtc, tslot) = t_vblank; + } + + smp_mb__before_atomic(); + atomic_add(diff, &dev->vblank[crtc].count); + smp_mb__after_atomic(); +} + /* * Disable vblank irq's on crtc, make sure that last vblank count * of hardware and corresponding consistent software vblank counter @@ -798,70 +862,6 @@ void drm_send_vblank_event(struct drm_device *dev, int crtc, } EXPORT_SYMBOL(drm_send_vblank_event); -/** - * drm_update_vblank_count - update the master vblank counter - * @dev: DRM device - * @crtc: counter to update - * - * Call back into the driver to update the appropriate vblank counter - * (specified by @crtc). Deal with wraparound, if it occurred, and - * update the last read value so we can deal with wraparound on the next - * call if necessary. - * - * Only necessary when going from off->on, to account for frames we - * didn't get an interrupt for. - * - * Note: caller must hold dev->vbl_lock since this reads & writes - * device vblank fields. - */ -static void drm_update_vblank_count(struct drm_device *dev, int crtc) -{ - u32 cur_vblank, diff, tslot, rc; - struct timeval t_vblank; - - /* - * Interrupts were disabled prior to this call, so deal with counter - * wrap if needed. - * NOTE! It's possible we lost a full dev->max_vblank_count events - * here if the register is small or we had vblank interrupts off for - * a long time. - * - * We repeat the hardware vblank counter & timestamp query until - * we get consistent results. This to prevent races between gpu - * updating its hardware counter while we are retrieving the - * corresponding vblank timestamp. - */ - do { - cur_vblank = dev->driver->get_vblank_counter(dev, crtc); - rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0); - } while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc)); - - /* Deal with counter wrap */ - diff = cur_vblank - dev->vblank[crtc].last; - if (cur_vblank < dev->vblank[crtc].last) { - diff += dev->max_vblank_count; - - DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n", - crtc, dev->vblank[crtc].last, cur_vblank, diff); - } - - DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", - crtc, diff); - - /* Reinitialize corresponding vblank timestamp if high-precision query - * available. Skip this step if query unsupported or failed. Will - * reinitialize delayed at next vblank interrupt in that case. - */ - if (rc) { - tslot = atomic_read(&dev->vblank[crtc].count) + diff; - vblanktimestamp(dev, crtc, tslot) = t_vblank; - } - - smp_mb__before_atomic(); - atomic_add(diff, &dev->vblank[crtc].count); - smp_mb__after_atomic(); -} - /** * drm_vblank_enable - enable the vblank interrupt on a CRTC * @dev: DRM device -- GitLab From 812e7465a7decf3cca0b5f71977a25eecd9626a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:48 +0300 Subject: [PATCH 0010/1868] drm: Have the vblank counter account for the time between vblank irq disable and drm_vblank_off() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the vblank irq has already been disabled (via the disable timer) when we call drm_vblank_off() sample the counter and timestamp one last time. This will make the sure that the user space visible counter will account for time between vblank irq disable and drm_vblank_off(). Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index af965174b083..1f86f6c6ecc6 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -140,6 +140,19 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) */ spin_lock_irqsave(&dev->vblank_time_lock, irqflags); + /* + * If the vblank interrupt was already disbled update the count + * and timestamp to maintain the appearance that the counter + * has been ticking all along until this time. This makes the + * count account for the entire time between drm_vblank_on() and + * drm_vblank_off(). + */ + if (!dev->vblank[crtc].enabled) { + drm_update_vblank_count(dev, crtc); + spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); + return; + } + dev->driver->disable_vblank(dev, crtc); dev->vblank[crtc].enabled = false; -- GitLab From f8ad028cc033f75fc479ca1c30e2ea4ba56e5269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:49 +0300 Subject: [PATCH 0011/1868] drm: Avoid random vblank counter jumps if the hardware counter has been reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When drm_vblank_on() is called the hardware vblank counter may have been reset, so we can't trust that the old values sampled prior to drm_vblank_off() have anything to do with the new values. So update the .last count in drm_vblank_on() to make the first drm_vblank_enable() consider that as the reference point. This will correct the user space visible counter to account for the time between drm_vblank_on() and the first drm_vblank_enable() calls. For extra safety subtract one from the .last count in drm_vblank_on() to make sure that user space will never see the same counter value before and after modeset. Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 1f86f6c6ecc6..fc1525a499d9 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1095,6 +1095,18 @@ void drm_vblank_on(struct drm_device *dev, int crtc) atomic_dec(&dev->vblank[crtc].refcount); dev->vblank[crtc].inmodeset = 0; } + + /* + * sample the current counter to avoid random jumps + * when drm_vblank_enable() applies the diff + * + * -1 to make sure user will never see the same + * vblank counter value before and after a modeset + */ + dev->vblank[crtc].last = + (dev->driver->get_vblank_counter(dev, crtc) - 1) & + dev->max_vblank_count; + /* re-enable interrupts if there's are users left */ if (atomic_read(&dev->vblank[crtc].refcount) != 0) WARN_ON(drm_vblank_enable(dev, crtc)); -- GitLab From 8a51d5bef07f1c8c59de20089fb27ea39d395f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:50 +0300 Subject: [PATCH 0012/1868] drm: Reduce the amount of dev->vblank[crtc] in the code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Declare a local struct drm_vblank_crtc * and use that instead of having to do dig it out via 'dev->vblank[crtc]' everywhere. Performed with the following coccinelle incantation, and a few manual whitespace cleanups: @@ identifier func,member; expression num_crtcs; struct drm_device *dev; unsigned int crtc; @@ func (...) { + struct drm_vblank_crtc *vblank; ... if (crtc >= num_crtcs) return ...; + vblank = &dev->vblank[crtc]; <+... ( - dev->vblank[crtc].member + vblank->member | - &(dev->vblank[crtc]) + vblank ) ...+> } @@ struct drm_device *dev; int crtc; identifier member; expression num_crtcs; @@ for (crtc = 0; crtc < num_crtcs; crtc++) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + <+... ( - dev->vblank[crtc].member + vblank->member | - &(dev->vblank[crtc]) + vblank ) ...+> } @@ identifier func,member; @@ func (struct drm_device *dev, int crtc, ...) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; <+... ( - dev->vblank[crtc].member + vblank->member | - &(dev->vblank[crtc]) + vblank ) ...+> } v2: Rebased Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 134 ++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index fc1525a499d9..b4460bf0e0e4 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -73,6 +73,7 @@ */ static void drm_update_vblank_count(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; u32 cur_vblank, diff, tslot, rc; struct timeval t_vblank; @@ -94,12 +95,12 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) } while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc)); /* Deal with counter wrap */ - diff = cur_vblank - dev->vblank[crtc].last; - if (cur_vblank < dev->vblank[crtc].last) { + diff = cur_vblank - vblank->last; + if (cur_vblank < vblank->last) { diff += dev->max_vblank_count; DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n", - crtc, dev->vblank[crtc].last, cur_vblank, diff); + crtc, vblank->last, cur_vblank, diff); } DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", @@ -110,12 +111,12 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) * reinitialize delayed at next vblank interrupt in that case. */ if (rc) { - tslot = atomic_read(&dev->vblank[crtc].count) + diff; + tslot = atomic_read(&vblank->count) + diff; vblanktimestamp(dev, crtc, tslot) = t_vblank; } smp_mb__before_atomic(); - atomic_add(diff, &dev->vblank[crtc].count); + atomic_add(diff, &vblank->count); smp_mb__after_atomic(); } @@ -127,6 +128,7 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) */ static void vblank_disable_and_save(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; u32 vblcount; s64 diff_ns; @@ -147,14 +149,14 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * count account for the entire time between drm_vblank_on() and * drm_vblank_off(). */ - if (!dev->vblank[crtc].enabled) { + if (!vblank->enabled) { drm_update_vblank_count(dev, crtc); spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); return; } dev->driver->disable_vblank(dev, crtc); - dev->vblank[crtc].enabled = false; + vblank->enabled = false; /* No further vblank irq's will be processed after * this point. Get current hardware vblank count and @@ -169,9 +171,9 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * delayed gpu counter increment. */ do { - dev->vblank[crtc].last = dev->driver->get_vblank_counter(dev, crtc); + vblank->last = dev->driver->get_vblank_counter(dev, crtc); vblrc = drm_get_last_vbltimestamp(dev, crtc, &tvblank, 0); - } while (dev->vblank[crtc].last != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc); + } while (vblank->last != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc); if (!count) vblrc = 0; @@ -179,7 +181,7 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) /* Compute time difference to stored timestamp of last vblank * as updated by last invocation of drm_handle_vblank() in vblank irq. */ - vblcount = atomic_read(&dev->vblank[crtc].count); + vblcount = atomic_read(&vblank->count); diff_ns = timeval_to_ns(&tvblank) - timeval_to_ns(&vblanktimestamp(dev, crtc, vblcount)); @@ -196,7 +198,7 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * hope for the best. */ if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) { - atomic_inc(&dev->vblank[crtc].count); + atomic_inc(&vblank->count); smp_mb__after_atomic(); } @@ -236,8 +238,10 @@ void drm_vblank_cleanup(struct drm_device *dev) return; for (crtc = 0; crtc < dev->num_crtcs; crtc++) { - del_timer_sync(&dev->vblank[crtc].disable_timer); - vblank_disable_fn((unsigned long)&dev->vblank[crtc]); + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + + del_timer_sync(&vblank->disable_timer); + vblank_disable_fn((unsigned long)vblank); } kfree(dev->vblank); @@ -270,11 +274,13 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs) goto err; for (i = 0; i < num_crtcs; i++) { - dev->vblank[i].dev = dev; - dev->vblank[i].crtc = i; - init_waitqueue_head(&dev->vblank[i].queue); - setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn, - (unsigned long)&dev->vblank[i]); + struct drm_vblank_crtc *vblank = &dev->vblank[i]; + + vblank->dev = dev; + vblank->crtc = i; + init_waitqueue_head(&vblank->queue); + setup_timer(&vblank->disable_timer, vblank_disable_fn, + (unsigned long)vblank); } DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n"); @@ -426,9 +432,11 @@ int drm_irq_uninstall(struct drm_device *dev) if (dev->num_crtcs) { spin_lock_irqsave(&dev->vbl_lock, irqflags); for (i = 0; i < dev->num_crtcs; i++) { - wake_up(&dev->vblank[i].queue); - dev->vblank[i].enabled = false; - dev->vblank[i].last = + struct drm_vblank_crtc *vblank = &dev->vblank[i]; + + wake_up(&vblank->queue); + vblank->enabled = false; + vblank->last = dev->driver->get_vblank_counter(dev, i); } spin_unlock_irqrestore(&dev->vbl_lock, irqflags); @@ -796,7 +804,9 @@ EXPORT_SYMBOL(drm_get_last_vbltimestamp); */ u32 drm_vblank_count(struct drm_device *dev, int crtc) { - return atomic_read(&dev->vblank[crtc].count); + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + + return atomic_read(&vblank->count); } EXPORT_SYMBOL(drm_vblank_count); @@ -816,6 +826,7 @@ EXPORT_SYMBOL(drm_vblank_count); u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc, struct timeval *vblanktime) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; u32 cur_vblank; /* Read timestamp from slot of _vblank_time ringbuffer @@ -824,10 +835,10 @@ u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc, * a seqlock. */ do { - cur_vblank = atomic_read(&dev->vblank[crtc].count); + cur_vblank = atomic_read(&vblank->count); *vblanktime = vblanktimestamp(dev, crtc, cur_vblank); smp_rmb(); - } while (cur_vblank != atomic_read(&dev->vblank[crtc].count)); + } while (cur_vblank != atomic_read(&vblank->count)); return cur_vblank; } @@ -882,13 +893,14 @@ EXPORT_SYMBOL(drm_send_vblank_event); */ static int drm_vblank_enable(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; int ret = 0; assert_spin_locked(&dev->vbl_lock); spin_lock(&dev->vblank_time_lock); - if (!dev->vblank[crtc].enabled) { + if (!vblank->enabled) { /* * Enable vblank irqs under vblank_time_lock protection. * All vblank count & timestamp updates are held off @@ -899,9 +911,9 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc) ret = dev->driver->enable_vblank(dev, crtc); DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret); if (ret) - atomic_dec(&dev->vblank[crtc].refcount); + atomic_dec(&vblank->refcount); else { - dev->vblank[crtc].enabled = true; + vblank->enabled = true; drm_update_vblank_count(dev, crtc); } } @@ -926,16 +938,17 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc) */ int drm_vblank_get(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; int ret = 0; spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Going from 0->1 means we have to enable interrupts again */ - if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) { + if (atomic_add_return(1, &vblank->refcount) == 1) { ret = drm_vblank_enable(dev, crtc); } else { - if (!dev->vblank[crtc].enabled) { - atomic_dec(&dev->vblank[crtc].refcount); + if (!vblank->enabled) { + atomic_dec(&vblank->refcount); ret = -EINVAL; } } @@ -975,12 +988,14 @@ EXPORT_SYMBOL(drm_crtc_vblank_get); */ void drm_vblank_put(struct drm_device *dev, int crtc) { - BUG_ON(atomic_read(&dev->vblank[crtc].refcount) == 0); + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + + BUG_ON(atomic_read(&vblank->refcount) == 0); /* Last user schedules interrupt disable */ - if (atomic_dec_and_test(&dev->vblank[crtc].refcount) && + if (atomic_dec_and_test(&vblank->refcount) && (drm_vblank_offdelay > 0)) - mod_timer(&dev->vblank[crtc].disable_timer, + mod_timer(&vblank->disable_timer, jiffies + ((drm_vblank_offdelay * HZ)/1000)); } EXPORT_SYMBOL(drm_vblank_put); @@ -1016,6 +1031,7 @@ EXPORT_SYMBOL(drm_crtc_vblank_put); */ void drm_vblank_off(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; struct drm_pending_vblank_event *e, *t; struct timeval now; unsigned long irqflags; @@ -1023,7 +1039,7 @@ void drm_vblank_off(struct drm_device *dev, int crtc) spin_lock_irqsave(&dev->vbl_lock, irqflags); vblank_disable_and_save(dev, crtc); - wake_up(&dev->vblank[crtc].queue); + wake_up(&vblank->queue); /* Send any queued vblank events, lest the natives grow disquiet */ seq = drm_vblank_count_and_time(dev, crtc, &now); @@ -1045,9 +1061,9 @@ void drm_vblank_off(struct drm_device *dev, int crtc) * Prevent subsequent drm_vblank_get() from re-enabling * the vblank interrupt by bumping the refcount. */ - if (!dev->vblank[crtc].inmodeset) { - atomic_inc(&dev->vblank[crtc].refcount); - dev->vblank[crtc].inmodeset = 1; + if (!vblank->inmodeset) { + atomic_inc(&vblank->refcount); + vblank->inmodeset = 1; } spin_unlock_irqrestore(&dev->vbl_lock, irqflags); @@ -1087,13 +1103,14 @@ EXPORT_SYMBOL(drm_crtc_vblank_off); */ void drm_vblank_on(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Drop our private "prevent drm_vblank_get" refcount */ - if (dev->vblank[crtc].inmodeset) { - atomic_dec(&dev->vblank[crtc].refcount); - dev->vblank[crtc].inmodeset = 0; + if (vblank->inmodeset) { + atomic_dec(&vblank->refcount); + vblank->inmodeset = 0; } /* @@ -1103,12 +1120,12 @@ void drm_vblank_on(struct drm_device *dev, int crtc) * -1 to make sure user will never see the same * vblank counter value before and after a modeset */ - dev->vblank[crtc].last = + vblank->last = (dev->driver->get_vblank_counter(dev, crtc) - 1) & dev->max_vblank_count; /* re-enable interrupts if there's are users left */ - if (atomic_read(&dev->vblank[crtc].refcount) != 0) + if (atomic_read(&vblank->refcount) != 0) WARN_ON(drm_vblank_enable(dev, crtc)); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } @@ -1156,6 +1173,8 @@ EXPORT_SYMBOL(drm_crtc_vblank_on); */ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + /* vblank is not initialized (IRQ not installed ?), or has been freed */ if (!dev->num_crtcs) return; @@ -1166,10 +1185,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) * to avoid corrupting the count if multiple, mismatch calls occur), * so that interrupts remain enabled in the interim. */ - if (!dev->vblank[crtc].inmodeset) { - dev->vblank[crtc].inmodeset = 0x1; + if (!vblank->inmodeset) { + vblank->inmodeset = 0x1; if (drm_vblank_get(dev, crtc) == 0) - dev->vblank[crtc].inmodeset |= 0x2; + vblank->inmodeset |= 0x2; } } EXPORT_SYMBOL(drm_vblank_pre_modeset); @@ -1184,21 +1203,22 @@ EXPORT_SYMBOL(drm_vblank_pre_modeset); */ void drm_vblank_post_modeset(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; /* vblank is not initialized (IRQ not installed ?), or has been freed */ if (!dev->num_crtcs) return; - if (dev->vblank[crtc].inmodeset) { + if (vblank->inmodeset) { spin_lock_irqsave(&dev->vbl_lock, irqflags); dev->vblank_disable_allowed = true; spin_unlock_irqrestore(&dev->vbl_lock, irqflags); - if (dev->vblank[crtc].inmodeset & 0x2) + if (vblank->inmodeset & 0x2) drm_vblank_put(dev, crtc); - dev->vblank[crtc].inmodeset = 0; + vblank->inmodeset = 0; } } EXPORT_SYMBOL(drm_vblank_post_modeset); @@ -1333,6 +1353,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe, int drm_wait_vblank(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_vblank_crtc *vblank; union drm_wait_vblank *vblwait = data; int ret; unsigned int flags, seq, crtc, high_crtc; @@ -1362,6 +1383,8 @@ int drm_wait_vblank(struct drm_device *dev, void *data, if (crtc >= dev->num_crtcs) return -EINVAL; + vblank = &dev->vblank[crtc]; + ret = drm_vblank_get(dev, crtc); if (ret) { DRM_DEBUG("failed to acquire vblank counter, %d\n", ret); @@ -1394,11 +1417,11 @@ int drm_wait_vblank(struct drm_device *dev, void *data, DRM_DEBUG("waiting on vblank count %d, crtc %d\n", vblwait->request.sequence, crtc); - dev->vblank[crtc].last_wait = vblwait->request.sequence; - DRM_WAIT_ON(ret, dev->vblank[crtc].queue, 3 * HZ, + vblank->last_wait = vblwait->request.sequence; + DRM_WAIT_ON(ret, vblank->queue, 3 * HZ, (((drm_vblank_count(dev, crtc) - vblwait->request.sequence) <= (1 << 23)) || - !dev->vblank[crtc].enabled || + !vblank->enabled || !dev->irq_enabled)); if (ret != -EINTR) { @@ -1459,6 +1482,7 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) */ bool drm_handle_vblank(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; u32 vblcount; s64 diff_ns; struct timeval tvblank; @@ -1474,7 +1498,7 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) spin_lock_irqsave(&dev->vblank_time_lock, irqflags); /* Vblank irq handling disabled. Nothing to do. */ - if (!dev->vblank[crtc].enabled) { + if (!vblank->enabled) { spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); return false; } @@ -1484,7 +1508,7 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) */ /* Get current timestamp and count. */ - vblcount = atomic_read(&dev->vblank[crtc].count); + vblcount = atomic_read(&vblank->count); drm_get_last_vbltimestamp(dev, crtc, &tvblank, DRM_CALLED_FROM_VBLIRQ); /* Compute time difference to timestamp of last vblank */ @@ -1508,14 +1532,14 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) * the timestamp computed above. */ smp_mb__before_atomic(); - atomic_inc(&dev->vblank[crtc].count); + atomic_inc(&vblank->count); smp_mb__after_atomic(); } else { DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n", crtc, (int) diff_ns); } - wake_up(&dev->vblank[crtc].queue); + wake_up(&vblank->queue); drm_handle_vblank_events(dev, crtc); spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); -- GitLab From 56cc279b29c7b204fe7d0943509ae209b8b128db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:51 +0300 Subject: [PATCH 0013/1868] drm: Fix deadlock between event_lock and vbl_lock/vblank_time_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently both drm_irq.c and several drivers call drm_vblank_put() while holding event_lock. Now that drm_vblank_put() can disable the vblank interrupt directly it may need to grab vbl_lock and vblank_time_lock. That causes deadlocks since we take the locks in the opposite order in two places in drm_irq.c. So let's make sure the locking order is always event_lock->vbl_lock->vblank_time_lock. In drm_vblank_off() pull up event_lock from underneath vbl_lock. Hold the event_lock across the whole operation to make sure we only send out the events that were on the queue when we disabled the interrupt, and not ones that got added just after (assuming drm_vblank_on() already managed to get called somewhere between). To sort the other deadlock pull the event_lock out from drm_handle_vblank_events() into drm_handle_vblank() to be taken outside vblank_time_lock. Add the appropriate assert_spin_locked() to drm_handle_vblank_events(). Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 47 +++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b4460bf0e0e4..9353609c6770 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1037,14 +1037,25 @@ void drm_vblank_off(struct drm_device *dev, int crtc) unsigned long irqflags; unsigned int seq; - spin_lock_irqsave(&dev->vbl_lock, irqflags); + spin_lock_irqsave(&dev->event_lock, irqflags); + + spin_lock(&dev->vbl_lock); vblank_disable_and_save(dev, crtc); wake_up(&vblank->queue); + /* + * Prevent subsequent drm_vblank_get() from re-enabling + * the vblank interrupt by bumping the refcount. + */ + if (!vblank->inmodeset) { + atomic_inc(&vblank->refcount); + vblank->inmodeset = 1; + } + spin_unlock(&dev->vbl_lock); + /* Send any queued vblank events, lest the natives grow disquiet */ seq = drm_vblank_count_and_time(dev, crtc, &now); - spin_lock(&dev->event_lock); list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) { if (e->pipe != crtc) continue; @@ -1055,18 +1066,7 @@ void drm_vblank_off(struct drm_device *dev, int crtc) drm_vblank_put(dev, e->pipe); send_vblank_event(dev, e, seq, &now); } - spin_unlock(&dev->event_lock); - - /* - * Prevent subsequent drm_vblank_get() from re-enabling - * the vblank interrupt by bumping the refcount. - */ - if (!vblank->inmodeset) { - atomic_inc(&vblank->refcount); - vblank->inmodeset = 1; - } - - spin_unlock_irqrestore(&dev->vbl_lock, irqflags); + spin_unlock_irqrestore(&dev->event_lock, irqflags); } EXPORT_SYMBOL(drm_vblank_off); @@ -1446,12 +1446,11 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) { struct drm_pending_vblank_event *e, *t; struct timeval now; - unsigned long flags; unsigned int seq; - seq = drm_vblank_count_and_time(dev, crtc, &now); + assert_spin_locked(&dev->event_lock); - spin_lock_irqsave(&dev->event_lock, flags); + seq = drm_vblank_count_and_time(dev, crtc, &now); list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) { if (e->pipe != crtc) @@ -1467,8 +1466,6 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) send_vblank_event(dev, e, seq, &now); } - spin_unlock_irqrestore(&dev->event_lock, flags); - trace_drm_vblank_event(crtc, seq); } @@ -1491,15 +1488,18 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) if (!dev->num_crtcs) return false; + spin_lock_irqsave(&dev->event_lock, irqflags); + /* Need timestamp lock to prevent concurrent execution with * vblank enable/disable, as this would cause inconsistent * or corrupted timestamps and vblank counts. */ - spin_lock_irqsave(&dev->vblank_time_lock, irqflags); + spin_lock(&dev->vblank_time_lock); /* Vblank irq handling disabled. Nothing to do. */ if (!vblank->enabled) { - spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); + spin_unlock(&dev->vblank_time_lock); + spin_unlock_irqrestore(&dev->event_lock, irqflags); return false; } @@ -1539,10 +1539,13 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) crtc, (int) diff_ns); } + spin_unlock(&dev->vblank_time_lock); + wake_up(&vblank->queue); drm_handle_vblank_events(dev, crtc); - spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); + spin_unlock_irqrestore(&dev->event_lock, irqflags); + return true; } EXPORT_SYMBOL(drm_handle_vblank); -- GitLab From ffe7c73a8d4f0caeebd5d220ddbf7126a4daca1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:52 +0300 Subject: [PATCH 0014/1868] drm: Fix race between drm_vblank_off() and drm_queue_vblank_event() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently it's possible that the following will happen: 1. drm_wait_vblank() calls drm_vblank_get() 2. drm_vblank_off() gets called 3. drm_wait_vblank() calls drm_queue_vblank_event() which adds the event to the queue event though vblank interrupts are currently disabled (and may not be re-enabled ever again). To fix the problem, add another vblank->enabled check into drm_queue_vblank_event(). drm_vblank_off() holds event_lock around the vblank disable, so no further locking needs to be added to drm_queue_vblank_event(). vblank disable from another source is not possible since drm_wait_vblank() already holds a vblank reference. Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 9353609c6770..b2428cb0c64d 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1270,6 +1270,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe, union drm_wait_vblank *vblwait, struct drm_file *file_priv) { + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; struct drm_pending_vblank_event *e; struct timeval now; unsigned long flags; @@ -1293,6 +1294,18 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe, spin_lock_irqsave(&dev->event_lock, flags); + /* + * drm_vblank_off() might have been called after we called + * drm_vblank_get(). drm_vblank_off() holds event_lock + * around the vblank disable, so no need for further locking. + * The reference from drm_vblank_get() protects against + * vblank disable from another source. + */ + if (!vblank->enabled) { + ret = -EINVAL; + goto err_unlock; + } + if (file_priv->event_space < sizeof e->event) { ret = -EBUSY; goto err_unlock; -- GitLab From 4ed0ce3d0bccd74416ba6beb33a8a79d1617e97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:53 +0300 Subject: [PATCH 0015/1868] drm: Disable vblank interrupt immediately when drm_vblank_offdelay<0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make drm_vblank_put() disable the vblank interrupt immediately when the refcount drops to zero and drm_vblank_offdelay<0. v2: Preserve the current drm_vblank_offdelay==0 'never disable' behaviur Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 1 + drivers/gpu/drm/drm_drv.c | 4 ++-- drivers/gpu/drm/drm_irq.c | 11 +++++++---- include/drm/drmP.h | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 1d3756d3176c..55923d00bd52 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3386,6 +3386,7 @@ void (*disable_vblank) (struct drm_device *dev, int crtc); by scheduling a timer. The delay is accessible through the vblankoffdelay module parameter or the drm_vblank_offdelay global variable and expressed in milliseconds. Its default value is 5000 ms. + Zero means never disable, and a negative value means disable immediately. When a vertical blanking interrupt occurs drivers only need to call the diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 92bc6b1d9646..db03e16ca817 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -39,7 +39,7 @@ unsigned int drm_debug = 0; /* 1 to enable debug output */ EXPORT_SYMBOL(drm_debug); -unsigned int drm_vblank_offdelay = 5000; /* Default to 5000 msecs. */ +int drm_vblank_offdelay = 5000; /* Default to 5000 msecs. */ unsigned int drm_timestamp_precision = 20; /* Default to 20 usecs. */ @@ -53,7 +53,7 @@ MODULE_AUTHOR(CORE_AUTHOR); MODULE_DESCRIPTION(CORE_DESC); MODULE_LICENSE("GPL and additional rights"); MODULE_PARM_DESC(debug, "Enable debug output"); -MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs]"); +MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs] (0: never disable, <0: disable immediately)"); MODULE_PARM_DESC(timestamp_precision_usec, "Max. error on timestamps [usecs]"); MODULE_PARM_DESC(timestamp_monotonic, "Use monotonic timestamps"); diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b2428cb0c64d..99145c4d536b 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -993,10 +993,13 @@ void drm_vblank_put(struct drm_device *dev, int crtc) BUG_ON(atomic_read(&vblank->refcount) == 0); /* Last user schedules interrupt disable */ - if (atomic_dec_and_test(&vblank->refcount) && - (drm_vblank_offdelay > 0)) - mod_timer(&vblank->disable_timer, - jiffies + ((drm_vblank_offdelay * HZ)/1000)); + if (atomic_dec_and_test(&vblank->refcount)) { + if (drm_vblank_offdelay < 0) + vblank_disable_fn((unsigned long)vblank); + else if (drm_vblank_offdelay > 0) + mod_timer(&vblank->disable_timer, + jiffies + ((drm_vblank_offdelay * HZ)/1000)); + } } EXPORT_SYMBOL(drm_vblank_put); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index a57646382086..24b32d453c60 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1345,7 +1345,7 @@ extern void drm_put_dev(struct drm_device *dev); extern void drm_unplug_dev(struct drm_device *dev); extern unsigned int drm_debug; -extern unsigned int drm_vblank_offdelay; +extern int drm_vblank_offdelay; extern unsigned int drm_timestamp_precision; extern unsigned int drm_timestamp_monotonic; -- GitLab From 00185e667009dda907887a4f84fbd02c6e651a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:54 +0300 Subject: [PATCH 0016/1868] drm: Add dev->vblank_disable_immediate flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a flag to drm_device which will cause the vblank code to bypass the disable timer and always disable the vblank interrupt immediately when the last reference is dropped. v2: Add some notes about the flag to the kernel doc Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 6 ++++++ drivers/gpu/drm/drm_irq.c | 2 +- include/drm/drmP.h | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 55923d00bd52..583edbffff1a 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3387,6 +3387,12 @@ void (*disable_vblank) (struct drm_device *dev, int crtc); module parameter or the drm_vblank_offdelay global variable and expressed in milliseconds. Its default value is 5000 ms. Zero means never disable, and a negative value means disable immediately. + Drivers may override the behaviour by setting the + drm_device + vblank_disable_immediate flag, which when set + causes vblank interrupts to be disabled immediately regardless of the + drm_vblank_offdelay value. The flag should only be set if there's a + properly working hardware vblank counter present. When a vertical blanking interrupt occurs drivers only need to call the diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 99145c4d536b..8dbcc3f892d5 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -994,7 +994,7 @@ void drm_vblank_put(struct drm_device *dev, int crtc) /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&vblank->refcount)) { - if (drm_vblank_offdelay < 0) + if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0) vblank_disable_fn((unsigned long)vblank); else if (drm_vblank_offdelay > 0) mod_timer(&vblank->disable_timer, diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 24b32d453c60..17a5c10474bd 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1074,6 +1074,16 @@ struct drm_device { */ bool vblank_disable_allowed; + /* + * If true, vblank interrupt will be disabled immediately when the + * refcount drops to zero, as opposed to via the vblank disable + * timer. + * This can be set to true it the hardware has a working vblank + * counter and the driver uses drm_vblank_on() and drm_vblank_off() + * appropriately. + */ + bool vblank_disable_immediate; + /* array of size num_crtcs */ struct drm_vblank_crtc *vblank; -- GitLab From 21da27005f79d72499bb809616b15fd2c5c15319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:55 +0300 Subject: [PATCH 0017/1868] drm/i915: Opt out of vblank disable timer on >gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the vblank races are plugged, we can opt out of using the vblank disable timer and just let vblank interrupts get disabled immediately when the last reference is dropped. Gen2 is the exception since it has no hardware frame counter. Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6ef9d6fabf80..845f0f6c1eeb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4680,6 +4680,14 @@ void intel_irq_init(struct drm_device *dev) dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ } + /* + * Opt out of the vblank disable timer on everything except gen2. + * Gen2 doesn't have a hardware frame counter and so depends on + * vblank interrupts to produce sane vblank seuquence numbers. + */ + if (!IS_GEN2(dev)) + dev->vblank_disable_immediate = true; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; -- GitLab From cd19e52aee922ffe5c50b6ed67acd58cc1b2738b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:56 +0300 Subject: [PATCH 0018/1868] drm: Kick start vblank interrupts at drm_vblank_on() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user is interested in getting accurate vblank sequence numbers all the time they may disable the vblank disable timer entirely. In that case it seems appropriate to kick start the vblank interrupts already from drm_vblank_on(). v2: Adapt to the drm_vblank_offdelay ==0 vs <0 changes Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 8dbcc3f892d5..af33df1adc6d 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1126,9 +1126,12 @@ void drm_vblank_on(struct drm_device *dev, int crtc) vblank->last = (dev->driver->get_vblank_counter(dev, crtc) - 1) & dev->max_vblank_count; - - /* re-enable interrupts if there's are users left */ - if (atomic_read(&vblank->refcount) != 0) + /* + * re-enable interrupts if there are users left, or the + * user wishes vblank interrupts to be enabled all the time. + */ + if (atomic_read(&vblank->refcount) != 0 || + (!dev->vblank_disable_immediate && drm_vblank_offdelay == 0)) WARN_ON(drm_vblank_enable(dev, crtc)); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } -- GitLab From d297e1037327884fe9545f434d720fd3e8f18c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:50:01 +0300 Subject: [PATCH 0019/1868] drm/i915: Update scanline_offset only for active crtcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update_scanline_offset() in intel_sanitize_crtc() was supposed to be called only for active crtcs. But due to some underrun patches it now gets updated for all crtcs on gmch platforms. Move the update_scanline_offset() to the very beginning of intel_sanitize_crtc() where we update the vblank state. This seems like a better place anyway since the scanline offset ought to be up to date before we might need to consult it. So before any vblanky stuff happens. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f6b932d8e79..de40a44e0ca0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12719,9 +12719,10 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK); /* restore vblank interrupts to correct state */ - if (crtc->active) + if (crtc->active) { + update_scanline_offset(crtc); drm_vblank_on(dev, crtc->pipe); - else + } else drm_vblank_off(dev, crtc->pipe); /* We need to sanitize the plane -> pipe mapping first because this will @@ -12820,8 +12821,6 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) */ crtc->cpu_fifo_underrun_disabled = true; crtc->pch_fifo_underrun_disabled = true; - - update_scanline_offset(crtc); } } -- GitLab From 96a9fdd778037799f63c9ae272ec915dd3ad83dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:50:02 +0300 Subject: [PATCH 0020/1868] drm: Fix confusing debug message in drm_update_vblank_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that drm_update_vblank_count() can be called even when we're not about to enable the vblank interrupts we shouldn't print debug messages stating otherwise. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index af33df1adc6d..62dee812d28a 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -103,7 +103,7 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) crtc, vblank->last, cur_vblank, diff); } - DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", + DRM_DEBUG("updating vblank count on crtc %d, missed %d\n", crtc, diff); /* Reinitialize corresponding vblank timestamp if high-precision query -- GitLab From c50d7521617d823d769b280bc499e19e364434ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:59 +0300 Subject: [PATCH 0021/1868] drm: Store the vblank timestamp when adjusting the counter during disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During vblank disable the code tries to guess based on the timestamps whether we just missed one vblank or not. And if so it increments the counter. However it forgets to store the new timestamp to the approriate slot in our timestamp ring buffer. So anyone querying the timestamp for the resulting sequence number would get a stale timestamp. Fix it up by storing the new timestamp. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 62dee812d28a..aa9b06495067 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -198,6 +198,13 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * hope for the best. */ if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) { + /* Store new timestamp in ringbuffer. */ + vblanktimestamp(dev, crtc, vblcount + 1) = tvblank; + + /* Increment cooked vblank count. This also atomically commits + * the timestamp computed above. + */ + smp_mb__before_atomic(); atomic_inc(&vblank->count); smp_mb__after_atomic(); } -- GitLab From 79a093aea44f11fda0a5b4dbe4c1e29b2f586f4e Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 6 Aug 2014 03:22:44 +0200 Subject: [PATCH 0022/1868] drm: Remove drm_vblank_cleanup from drm_vblank_init error path. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_vblank_cleanup() would operate on non-existent dev->vblank data structure, as failure to allocate that data structure is what triggers the error path in the first place. Signed-off-by: Mario Kleiner Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index aa9b06495067..6473089e5fd3 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -303,7 +303,7 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs) return 0; err: - drm_vblank_cleanup(dev); + dev->num_crtcs = 0; return ret; } EXPORT_SYMBOL(drm_vblank_init); -- GitLab From f769cd247d2be5af377adf82882eddd1dce183c4 Mon Sep 17 00:00:00 2001 From: Vandana Kannan Date: Tue, 5 Aug 2014 07:51:22 -0700 Subject: [PATCH 0023/1868] drm/i915: Set M2_N2 registers during mode set For Gen < 8, set M2_N2 registers on every mode set. This is required to make sure M2_N2 registers are set during boot, resume from sleep for cross- checking the state. The register is set only if DRRS is supported. v2: Patch rebased v3: Daniel's review comments - Removed HAS_DRRS(dev) and added bool has_drrs to pipe_config to track drrs support v4: Jesse's review comments - Made changes to set m2_n2 in intel_dp_set_m_n() Signed-off-by: Vandana Kannan Cc: Daniel Vetter Cc: Jesse Barnes Signed-off-by: Rodrigo Vivi Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 28 +++++++++++++++++++++------- drivers/gpu/drm/i915/intel_dp.c | 18 +++--------------- drivers/gpu/drm/i915/intel_drv.h | 3 ++- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 018fb7222f60..acee1416eb93 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -91,11 +91,11 @@ static int intel_framebuffer_init(struct drm_device *dev, struct intel_framebuffer *ifb, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_i915_gem_object *obj); -static void intel_dp_set_m_n(struct intel_crtc *crtc); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, - struct intel_link_m_n *m_n); + struct intel_link_m_n *m_n, + struct intel_link_m_n *m2_n2); static void ironlake_set_pipeconf(struct drm_crtc *crtc); static void haswell_set_pipeconf(struct drm_crtc *crtc); static void intel_set_pipe_csc(struct drm_crtc *crtc); @@ -3980,7 +3980,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) if (intel_crtc->config.has_pch_encoder) { intel_cpu_transcoder_set_m_n(intel_crtc, - &intel_crtc->config.fdi_m_n); + &intel_crtc->config.fdi_m_n, NULL); } ironlake_set_pipeconf(crtc); @@ -4093,7 +4093,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) if (intel_crtc->config.has_pch_encoder) { intel_cpu_transcoder_set_m_n(intel_crtc, - &intel_crtc->config.fdi_m_n); + &intel_crtc->config.fdi_m_n, NULL); } haswell_set_pipeconf(crtc); @@ -5509,7 +5509,8 @@ static void intel_pch_transcoder_set_m_n(struct intel_crtc *crtc, } static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, - struct intel_link_m_n *m_n) + struct intel_link_m_n *m_n, + struct intel_link_m_n *m2_n2) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -5521,6 +5522,18 @@ static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, I915_WRITE(PIPE_DATA_N1(transcoder), m_n->gmch_n); I915_WRITE(PIPE_LINK_M1(transcoder), m_n->link_m); I915_WRITE(PIPE_LINK_N1(transcoder), m_n->link_n); + /* M2_N2 registers to be set only for gen < 8 (M2_N2 available + * for gen < 8) and if DRRS is supported (to make sure the + * registers are not unnecessarily accessed). + */ + if (m2_n2 && INTEL_INFO(dev)->gen < 8 && + crtc->config.has_drrs) { + I915_WRITE(PIPE_DATA_M2(transcoder), + TU_SIZE(m2_n2->tu) | m2_n2->gmch_m); + I915_WRITE(PIPE_DATA_N2(transcoder), m2_n2->gmch_n); + I915_WRITE(PIPE_LINK_M2(transcoder), m2_n2->link_m); + I915_WRITE(PIPE_LINK_N2(transcoder), m2_n2->link_n); + } } else { I915_WRITE(PIPE_DATA_M_G4X(pipe), TU_SIZE(m_n->tu) | m_n->gmch_m); I915_WRITE(PIPE_DATA_N_G4X(pipe), m_n->gmch_n); @@ -5529,12 +5542,13 @@ static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, } } -static void intel_dp_set_m_n(struct intel_crtc *crtc) +void intel_dp_set_m_n(struct intel_crtc *crtc) { if (crtc->config.has_pch_encoder) intel_pch_transcoder_set_m_n(crtc, &crtc->config.dp_m_n); else - intel_cpu_transcoder_set_m_n(crtc, &crtc->config.dp_m_n); + intel_cpu_transcoder_set_m_n(crtc, &crtc->config.dp_m_n, + &crtc->config.dp_m2_n2); } static void vlv_update_pll(struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 805b6f63df0f..3ea5cef9bbe6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -828,20 +828,6 @@ intel_dp_set_clock(struct intel_encoder *encoder, } } -static void -intel_dp_set_m2_n2(struct intel_crtc *crtc, struct intel_link_m_n *m_n) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - enum transcoder transcoder = crtc->config.cpu_transcoder; - - I915_WRITE(PIPE_DATA_M2(transcoder), - TU_SIZE(m_n->tu) | m_n->gmch_m); - I915_WRITE(PIPE_DATA_N2(transcoder), m_n->gmch_n); - I915_WRITE(PIPE_LINK_M2(transcoder), m_n->link_m); - I915_WRITE(PIPE_LINK_N2(transcoder), m_n->link_n); -} - bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_config *pipe_config) @@ -867,6 +853,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->has_dp_encoder = true; + pipe_config->has_drrs = false; pipe_config->has_audio = intel_dp->has_audio; if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) { @@ -970,6 +957,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (intel_connector->panel.downclock_mode != NULL && intel_dp->drrs_state.type == SEAMLESS_DRRS_SUPPORT) { + pipe_config->has_drrs = true; intel_link_compute_m_n(bpp, lane_count, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, @@ -4389,7 +4377,7 @@ void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) val = I915_READ(reg); if (index > DRRS_HIGH_RR) { val |= PIPECONF_EDP_RR_MODE_SWITCH; - intel_dp_set_m2_n2(intel_crtc, &config->dp_m2_n2); + intel_dp_set_m_n(intel_crtc); } else { val &= ~PIPECONF_EDP_RR_MODE_SWITCH; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4b2664bd5b81..7a3cac095afe 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -324,6 +324,7 @@ struct intel_crtc_config { /* m2_n2 for eDP downclock */ struct intel_link_m_n dp_m2_n2; + bool has_drrs; /* * Frequence the dpll for the port should run at. Differs from the @@ -877,6 +878,7 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv); void hsw_disable_pc8(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config); +void intel_dp_set_m_n(struct intel_crtc *crtc); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); void ironlake_check_encoder_dotclock(const struct intel_crtc_config *pipe_config, @@ -892,7 +894,6 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, int intel_format_to_fourcc(int format); void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc); - /* intel_dp.c */ void intel_dp_init(struct drm_device *dev, int output_reg, enum port port); bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, -- GitLab From b95af8bee524974768681b2b92235e1e1e21bf1a Mon Sep 17 00:00:00 2001 From: Vandana Kannan Date: Tue, 5 Aug 2014 07:51:23 -0700 Subject: [PATCH 0024/1868] drm/i915: State readout and cross-checking for dp_m2_n2 Adding relevant read out comparison code, in check_crtc_state, for the new member of crtc_config, dp_m2_n2, which was introduced to store link_m_n values for a DP downclock mode (if available). Suggested by Daniel. v2: Changed patch title. Daniel's review comments incorporated. Added relevant state readout code for M2_N2. dp_m2_n2 comparison to be done only when high RR is not in use (This is because alternate m_n register programming will be done only when low RR is being used). v3: Modified call to get_m2_n2 which had dp_m_n as param by mistake. Compare dp_m_n and dp_m2_n2 for gen 7 and below. compare the structures based on DRRS state for gen 8 and above. Save and restore M2 N2 registers for gen 7 and below v4: For Gen>=8, check M_N registers against dp_m_n and dp_m2_n2 as there is only one set of M_N registers v5: Removed the chunk which saves and restores M2_N2 registers. Modified get_m_n() to get M2_N2 registers as well. Modified the macro which compares hw.dp_m_n against sw.dp_m2_n2/sw.dp_m_n for gen > 8. v6: Added check to compare dp_m2_n2 only when DRRS is enabled v7: Modified drrs check to use has_drrs v8: Add has_drrs check before reading M2_N2 registers Signed-off-by: Vandana Kannan Cc: Daniel Vetter Cc: Jani Nikula Cc: Jesse Barnes Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 75 +++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index acee1416eb93..620a89961d36 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7149,7 +7149,8 @@ static void intel_pch_transcoder_get_m_n(struct intel_crtc *crtc, static void intel_cpu_transcoder_get_m_n(struct intel_crtc *crtc, enum transcoder transcoder, - struct intel_link_m_n *m_n) + struct intel_link_m_n *m_n, + struct intel_link_m_n *m2_n2) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -7163,6 +7164,20 @@ static void intel_cpu_transcoder_get_m_n(struct intel_crtc *crtc, m_n->gmch_n = I915_READ(PIPE_DATA_N1(transcoder)); m_n->tu = ((I915_READ(PIPE_DATA_M1(transcoder)) & TU_SIZE_MASK) >> TU_SIZE_SHIFT) + 1; + /* Read M2_N2 registers only for gen < 8 (M2_N2 available for + * gen < 8) and if DRRS is supported (to make sure the + * registers are not unnecessarily read). + */ + if (m2_n2 && INTEL_INFO(dev)->gen < 8 && + crtc->config.has_drrs) { + m2_n2->link_m = I915_READ(PIPE_LINK_M2(transcoder)); + m2_n2->link_n = I915_READ(PIPE_LINK_N2(transcoder)); + m2_n2->gmch_m = I915_READ(PIPE_DATA_M2(transcoder)) + & ~TU_SIZE_MASK; + m2_n2->gmch_n = I915_READ(PIPE_DATA_N2(transcoder)); + m2_n2->tu = ((I915_READ(PIPE_DATA_M2(transcoder)) + & TU_SIZE_MASK) >> TU_SIZE_SHIFT) + 1; + } } else { m_n->link_m = I915_READ(PIPE_LINK_M_G4X(pipe)); m_n->link_n = I915_READ(PIPE_LINK_N_G4X(pipe)); @@ -7181,14 +7196,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, intel_pch_transcoder_get_m_n(crtc, &pipe_config->dp_m_n); else intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder, - &pipe_config->dp_m_n); + &pipe_config->dp_m_n, + &pipe_config->dp_m2_n2); } static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config) { intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder, - &pipe_config->fdi_m_n); + &pipe_config->fdi_m_n, NULL); } static void ironlake_get_pfit_config(struct intel_crtc *crtc, @@ -10005,6 +10021,15 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->dp_m_n.gmch_m, pipe_config->dp_m_n.gmch_n, pipe_config->dp_m_n.link_m, pipe_config->dp_m_n.link_n, pipe_config->dp_m_n.tu); + + DRM_DEBUG_KMS("dp: %i, gmch_m2: %u, gmch_n2: %u, link_m2: %u, link_n2: %u, tu2: %u\n", + pipe_config->has_dp_encoder, + pipe_config->dp_m2_n2.gmch_m, + pipe_config->dp_m2_n2.gmch_n, + pipe_config->dp_m2_n2.link_m, + pipe_config->dp_m2_n2.link_n, + pipe_config->dp_m2_n2.tu); + DRM_DEBUG_KMS("requested mode:\n"); drm_mode_debug_printmodeline(&pipe_config->requested_mode); DRM_DEBUG_KMS("adjusted mode:\n"); @@ -10385,6 +10410,22 @@ intel_pipe_config_compare(struct drm_device *dev, return false; \ } +/* This is required for BDW+ where there is only one set of registers for + * switching between high and low RR. + * This macro can be used whenever a comparison has to be made between one + * hw state and multiple sw state variables. + */ +#define PIPE_CONF_CHECK_I_ALT(name, alt_name) \ + if ((current_config->name != pipe_config->name) && \ + (current_config->alt_name != pipe_config->name)) { \ + DRM_ERROR("mismatch in " #name " " \ + "(expected %i or %i, found %i)\n", \ + current_config->name, \ + current_config->alt_name, \ + pipe_config->name); \ + return false; \ + } + #define PIPE_CONF_CHECK_FLAGS(name, mask) \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ DRM_ERROR("mismatch in " #name "(" #mask ") " \ @@ -10417,11 +10458,28 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_I(fdi_m_n.tu); PIPE_CONF_CHECK_I(has_dp_encoder); - PIPE_CONF_CHECK_I(dp_m_n.gmch_m); - PIPE_CONF_CHECK_I(dp_m_n.gmch_n); - PIPE_CONF_CHECK_I(dp_m_n.link_m); - PIPE_CONF_CHECK_I(dp_m_n.link_n); - PIPE_CONF_CHECK_I(dp_m_n.tu); + + if (INTEL_INFO(dev)->gen < 8) { + PIPE_CONF_CHECK_I(dp_m_n.gmch_m); + PIPE_CONF_CHECK_I(dp_m_n.gmch_n); + PIPE_CONF_CHECK_I(dp_m_n.link_m); + PIPE_CONF_CHECK_I(dp_m_n.link_n); + PIPE_CONF_CHECK_I(dp_m_n.tu); + + if (current_config->has_drrs) { + PIPE_CONF_CHECK_I(dp_m2_n2.gmch_m); + PIPE_CONF_CHECK_I(dp_m2_n2.gmch_n); + PIPE_CONF_CHECK_I(dp_m2_n2.link_m); + PIPE_CONF_CHECK_I(dp_m2_n2.link_n); + PIPE_CONF_CHECK_I(dp_m2_n2.tu); + } + } else { + PIPE_CONF_CHECK_I_ALT(dp_m_n.gmch_m, dp_m2_n2.gmch_m); + PIPE_CONF_CHECK_I_ALT(dp_m_n.gmch_n, dp_m2_n2.gmch_n); + PIPE_CONF_CHECK_I_ALT(dp_m_n.link_m, dp_m2_n2.link_m); + PIPE_CONF_CHECK_I_ALT(dp_m_n.link_n, dp_m2_n2.link_n); + PIPE_CONF_CHECK_I_ALT(dp_m_n.tu, dp_m2_n2.tu); + } PIPE_CONF_CHECK_I(adjusted_mode.crtc_hdisplay); PIPE_CONF_CHECK_I(adjusted_mode.crtc_htotal); @@ -10507,6 +10565,7 @@ intel_pipe_config_compare(struct drm_device *dev, #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I +#undef PIPE_CONF_CHECK_I_ALT #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY #undef PIPE_CONF_QUIRK -- GitLab From 020178a1bcadf20b9d057988984f374c905d542e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 22 May 2014 19:36:03 +0300 Subject: [PATCH 0025/1868] drm: Add drm_crtc_vblank_waitqueue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a small static inline helper to grab the vblank wait queue based on the drm_crtc. This is useful for drivers to do internal vblank waits using wait_event() & co. v2: Pimp commit message (Daniel) Add kernel doc (Daniel) Suggested-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 1 + include/drm/drmP.h | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 1d3756d3176c..972759489376 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3400,6 +3400,7 @@ void (*disable_vblank) (struct drm_device *dev, int crtc); Vertical Blanking and Interrupt Handling Functions Reference !Edrivers/gpu/drm/drm_irq.c +!Iinclude/drm/drmP.h drm_crtc_vblank_waitqueue diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d3d9be6b83ef..bb44c1ee557d 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1344,6 +1344,17 @@ extern int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, extern void drm_calc_timestamping_constants(struct drm_crtc *crtc, const struct drm_display_mode *mode); +/** + * drm_crtc_vblank_waitqueue - get vblank waitqueue for the CRTC + * @crtc: which CRTC's vblank waitqueue to retrieve + * + * This function returns a pointer to the vblank waitqueue for the CRTC. + * Drivers can use this to implement vblank waits using wait_event() & co. + */ +static inline wait_queue_head_t *drm_crtc_vblank_waitqueue(struct drm_crtc *crtc) +{ + return &crtc->dev->vblank[drm_crtc_index(crtc)].queue; +} /* Modesetting support */ extern void drm_vblank_pre_modeset(struct drm_device *dev, int crtc); -- GitLab From 210871b67cd201c198b61ca80e1c51cd4b58c051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 22 May 2014 19:00:50 +0300 Subject: [PATCH 0026/1868] drm/i915: Kill intel_crtc->vbl_wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Share the waitqueue that drm_irq uses when performing the vblank evade trick for atomic pipe updates. v2: Keep intel_pipe_handle_vblank() (Chris) Suggested-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 5 ----- drivers/gpu/drm/i915/intel_display.c | 2 -- drivers/gpu/drm/i915/intel_drv.h | 2 -- drivers/gpu/drm/i915/intel_sprite.c | 5 +++-- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 390ccc2a3096..0e44c433cfc3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1989,14 +1989,9 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) static bool intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe) { - struct intel_crtc *crtc; - if (!drm_handle_vblank(dev, pipe)) return false; - crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe)); - wake_up(&crtc->vbl_wait); - return true; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 620a89961d36..a9b351d1ff88 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11870,8 +11870,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->cursor_base = ~0; intel_crtc->cursor_cntl = ~0; - init_waitqueue_head(&intel_crtc->vbl_wait); - BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7a3cac095afe..3198de3007be 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -425,8 +425,6 @@ struct intel_crtc { struct intel_pipe_wm active; } wm; - wait_queue_head_t vbl_wait; - int scanline_offset; struct intel_mmio_flip mmio_flip; }; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 168c6652cda1..d34a5696ffb6 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -53,6 +53,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl enum pipe pipe = crtc->pipe; long timeout = msecs_to_jiffies_timeout(1); int scanline, min, max, vblank_start; + wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); DEFINE_WAIT(wait); WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex)); @@ -81,7 +82,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl * other CPUs can see the task state update by the time we * read the scanline. */ - prepare_to_wait(&crtc->vbl_wait, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); scanline = intel_get_crtc_scanline(crtc); if (scanline < min || scanline > max) @@ -100,7 +101,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl local_irq_disable(); } - finish_wait(&crtc->vbl_wait, &wait); + finish_wait(wq, &wait); drm_vblank_put(dev, pipe); -- GitLab From 4811ff4f2388727a161ea49c2b0ddca95e44c7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:07 +0300 Subject: [PATCH 0027/1868] drm/i915: Add chv_power_wells[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add chv_power_wells[] so we can start to build up the power well support for chv. Just the "always on" well there initialy. Signed-off-by: Ville Syrjälä Tested-by: Rafael Barbalho Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 615e341682c3..7dbd7892b968 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6574,6 +6574,15 @@ static struct i915_power_well vlv_power_wells[] = { }, }; +static struct i915_power_well chv_power_wells[] = { + { + .name = "always-on", + .always_on = 1, + .domains = VLV_ALWAYS_ON_POWER_DOMAINS, + .ops = &i9xx_always_on_power_well_ops, + }, +}; + static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, enum punit_power_well power_well_id) { @@ -6610,6 +6619,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) } else if (IS_BROADWELL(dev_priv->dev)) { set_power_wells(power_domains, bdw_power_wells); hsw_pwr = power_domains; + } else if (IS_CHERRYVIEW(dev_priv->dev)) { + set_power_wells(power_domains, chv_power_wells); } else if (IS_VALLEYVIEW(dev_priv->dev)) { set_power_wells(power_domains, vlv_power_wells); } else { -- GitLab From 5d6f7ea752228788eddce0b9e268fa1f0eabdd7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:08 +0300 Subject: [PATCH 0028/1868] drm/i915: Add chv cmnlane power wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV has two display PHYs so there are also two cmnlane power wells. Add the approriate code to power the wells up/down. Like on VLV we do the cmnreset assert/deassert and the DPLL refclock enabling at approriate times. This code actually works on my bsw. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e4d7607da2c4..c3338ca4ab17 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -514,6 +514,7 @@ enum punit_power_well { PUNIT_POWER_WELL_DPIO_TX_C_LANES_23 = 9, PUNIT_POWER_WELL_DPIO_RX0 = 10, PUNIT_POWER_WELL_DPIO_RX1 = 11, + PUNIT_POWER_WELL_DPIO_CMN_D = 12, PUNIT_POWER_WELL_NUM, }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7dbd7892b968..8a78015dd51e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6254,6 +6254,64 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } +static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum dpio_phy phy; + + WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && + power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + + /* + * Enable the CRI clock source so we can get at the + * display and the reference clock for VGA + * hotplug / manual detection. + */ + if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + phy = DPIO_PHY0; + I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | + DPLL_REFA_CLK_ENABLE_VLV); + I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | + DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); + } else { + phy = DPIO_PHY1; + I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) | + DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); + } + udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ + vlv_set_power_well(dev_priv, power_well, true); + + /* Poll for phypwrgood signal */ + if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) + DRM_ERROR("Display PHY %d is not power up\n", phy); + + I915_WRITE(DISPLAY_PHY_CONTROL, + PHY_COM_LANE_RESET_DEASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); +} + +static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum dpio_phy phy; + + WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && + power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + + if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + phy = DPIO_PHY0; + assert_pll_disabled(dev_priv, PIPE_A); + assert_pll_disabled(dev_priv, PIPE_B); + } else { + phy = DPIO_PHY1; + assert_pll_disabled(dev_priv, PIPE_C); + } + + I915_WRITE(DISPLAY_PHY_CONTROL, + PHY_COM_LANE_RESET_ASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); + + vlv_set_power_well(dev_priv, power_well, false); +} + static void check_power_well_state(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -6445,6 +6503,18 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ BIT(POWER_DOMAIN_INIT)) +#define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, .enable = i9xx_always_on_power_well_noop, @@ -6452,6 +6522,13 @@ static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .is_enabled = i9xx_always_on_power_well_enabled, }; +static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { + .sync_hw = vlv_power_well_sync_hw, + .enable = chv_dpio_cmn_power_well_enable, + .disable = chv_dpio_cmn_power_well_disable, + .is_enabled = vlv_power_well_enabled, +}; + static struct i915_power_well i9xx_always_on_power_well[] = { { .name = "always-on", @@ -6581,6 +6658,18 @@ static struct i915_power_well chv_power_wells[] = { .domains = VLV_ALWAYS_ON_POWER_DOMAINS, .ops = &i9xx_always_on_power_well_ops, }, + { + .name = "dpio-common-bc", + .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DPIO_CMN_BC, + .ops = &chv_dpio_cmn_power_well_ops, + }, + { + .name = "dpio-common-d", + .domains = CHV_DPIO_CMN_D_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DPIO_CMN_D, + .ops = &chv_dpio_cmn_power_well_ops, + }, }; static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, -- GitLab From a74d782c74644b2e50b3db61e115831cdc3e9010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:09 +0300 Subject: [PATCH 0029/1868] drm/i915: Kill intel_reset_dpio() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both VLV and CHV handle the cmnreset stuff in the power well code now, so intel_reset_dpio() is no longer needed. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 31 ---------------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a9b351d1ff88..c0575ea1e196 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1513,34 +1513,6 @@ static void intel_init_dpio(struct drm_device *dev) } } -static void intel_reset_dpio(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (IS_CHERRYVIEW(dev)) { - enum dpio_phy phy; - u32 val; - - for (phy = DPIO_PHY0; phy < I915_NUM_PHYS_VLV; phy++) { - /* Poll for phypwrgood signal */ - if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & - PHY_POWERGOOD(phy), 1)) - DRM_ERROR("Display PHY %d is not power up\n", phy); - - /* - * Deassert common lane reset for PHY. - * - * This should only be done on init and resume from S3 - * with both PLLs disabled, or we risk losing DPIO and - * PLL synchronization. - */ - val = I915_READ(DISPLAY_PHY_CONTROL); - I915_WRITE(DISPLAY_PHY_CONTROL, - PHY_COM_LANE_RESET_DEASSERT(phy, val)); - } - } -} - static void vlv_enable_pll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -12615,8 +12587,6 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_init_clock_gating(dev); - intel_reset_dpio(dev); - intel_enable_gt_powersave(dev); } @@ -12687,7 +12657,6 @@ void intel_modeset_init(struct drm_device *dev) } intel_init_dpio(dev); - intel_reset_dpio(dev); intel_shared_dpll_init(dev); -- GitLab From f07057d13c62c5b925725c6e03a0c4d1c0244bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:10 +0300 Subject: [PATCH 0030/1868] drm/i915: Add disp2d power well for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not sure if it's still there since chv has per-pipe power wells. At least with current Punit this doesn't work. Also the display irq handling would need to be adjusted for pipe C. So leave the code iffed out for now. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8a78015dd51e..dc8719f29d03 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6658,6 +6658,14 @@ static struct i915_power_well chv_power_wells[] = { .domains = VLV_ALWAYS_ON_POWER_DOMAINS, .ops = &i9xx_always_on_power_well_ops, }, +#if 0 + { + .name = "display", + .domains = VLV_DISPLAY_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DISP2D, + .ops = &vlv_display_power_well_ops, + }, +#endif { .name = "dpio-common-bc", .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, -- GitLab From 26972b0a80091ccece1cbd9422772ae625a612f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:11 +0300 Subject: [PATCH 0031/1868] drm/i915: Add per-pipe power wells for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV has a power well for each pipe. Add the code to deal with them. The Punit in current hardware doesn't seem ready for this yet, so leave it iffed out. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 12 +++ drivers/gpu/drm/i915/intel_pm.c | 126 ++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c3338ca4ab17..9d54aee6f8c8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -501,6 +501,18 @@ #define DSPFREQSTAT_MASK (0x3 << DSPFREQSTAT_SHIFT) #define DSPFREQGUAR_SHIFT 14 #define DSPFREQGUAR_MASK (0x3 << DSPFREQGUAR_SHIFT) +#define _DP_SSC(val, pipe) ((val) << (2 * (pipe))) +#define DP_SSC_MASK(pipe) _DP_SSC(0x3, (pipe)) +#define DP_SSC_PWR_ON(pipe) _DP_SSC(0x0, (pipe)) +#define DP_SSC_CLK_GATE(pipe) _DP_SSC(0x1, (pipe)) +#define DP_SSC_RESET(pipe) _DP_SSC(0x2, (pipe)) +#define DP_SSC_PWR_GATE(pipe) _DP_SSC(0x3, (pipe)) +#define _DP_SSS(val, pipe) ((val) << (2 * (pipe) + 16)) +#define DP_SSS_MASK(pipe) _DP_SSS(0x3, (pipe)) +#define DP_SSS_PWR_ON(pipe) _DP_SSS(0x0, (pipe)) +#define DP_SSS_CLK_GATE(pipe) _DP_SSS(0x1, (pipe)) +#define DP_SSS_RESET(pipe) _DP_SSS(0x2, (pipe)) +#define DP_SSS_PWR_GATE(pipe) _DP_SSS(0x3, (pipe)) /* See the PUNIT HAS v0.8 for the below bits */ enum punit_power_well { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dc8719f29d03..95b3ca5964e9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6312,6 +6312,95 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } +static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum pipe pipe = power_well->data; + bool enabled; + u32 state, ctrl; + + mutex_lock(&dev_priv->rps.hw_lock); + + state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); + /* + * We only ever set the power-on and power-gate states, anything + * else is unexpected. + */ + WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe)); + enabled = state == DP_SSS_PWR_ON(pipe); + + /* + * A transient state at this point would mean some unexpected party + * is poking at the power controls too. + */ + ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); + WARN_ON(ctrl << 16 != state); + + mutex_unlock(&dev_priv->rps.hw_lock); + + return enabled; +} + +static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well, + bool enable) +{ + enum pipe pipe = power_well->data; + u32 state; + u32 ctrl; + + state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); + + mutex_lock(&dev_priv->rps.hw_lock); + +#define COND \ + ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) + + if (COND) + goto out; + + ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + ctrl &= ~DP_SSC_MASK(pipe); + ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl); + + if (wait_for(COND, 100)) + DRM_ERROR("timout setting power well state %08x (%08x)\n", + state, + vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ)); + +#undef COND + +out: + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); +} + +static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + WARN_ON_ONCE(power_well->data != PIPE_A && + power_well->data != PIPE_B && + power_well->data != PIPE_C); + + chv_set_pipe_power_well(dev_priv, power_well, true); +} + +static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + WARN_ON_ONCE(power_well->data != PIPE_A && + power_well->data != PIPE_B && + power_well->data != PIPE_C); + + chv_set_pipe_power_well(dev_priv, power_well, false); +} + static void check_power_well_state(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -6503,6 +6592,18 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ BIT(POWER_DOMAIN_INIT)) +#define CHV_PIPE_A_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_A) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_PIPE_B_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_B) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_PIPE_C_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_C) | \ + BIT(POWER_DOMAIN_INIT)) + #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ @@ -6522,6 +6623,13 @@ static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .is_enabled = i9xx_always_on_power_well_enabled, }; +static const struct i915_power_well_ops chv_pipe_power_well_ops = { + .sync_hw = chv_pipe_power_well_sync_hw, + .enable = chv_pipe_power_well_enable, + .disable = chv_pipe_power_well_disable, + .is_enabled = chv_pipe_power_well_enabled, +}; + static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { .sync_hw = vlv_power_well_sync_hw, .enable = chv_dpio_cmn_power_well_enable, @@ -6665,6 +6773,24 @@ static struct i915_power_well chv_power_wells[] = { .data = PUNIT_POWER_WELL_DISP2D, .ops = &vlv_display_power_well_ops, }, + { + .name = "pipe-a", + .domains = CHV_PIPE_A_POWER_DOMAINS, + .data = PIPE_A, + .ops = &chv_pipe_power_well_ops, + }, + { + .name = "pipe-b", + .domains = CHV_PIPE_B_POWER_DOMAINS, + .data = PIPE_B, + .ops = &chv_pipe_power_well_ops, + }, + { + .name = "pipe-c", + .domains = CHV_PIPE_C_POWER_DOMAINS, + .data = PIPE_C, + .ops = &chv_pipe_power_well_ops, + }, #endif { .name = "dpio-common-bc", -- GitLab From 8258356537871cf579868002552a0ed1762b0487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:12 +0300 Subject: [PATCH 0032/1868] drm/i915: Add chv port B and C TX wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the TX wells for ports B and C just like on VLV. Again Punit doesn't seem ready (or the wells don't even exist anymore) so leave it iffed out. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 95b3ca5964e9..59157635d418 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6804,6 +6804,36 @@ static struct i915_power_well chv_power_wells[] = { .data = PUNIT_POWER_WELL_DPIO_CMN_D, .ops = &chv_dpio_cmn_power_well_ops, }, +#if 0 + { + .name = "dpio-tx-b-01", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, + }, + { + .name = "dpio-tx-b-23", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, + }, + { + .name = "dpio-tx-c-01", + .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, + }, + { + .name = "dpio-tx-c-23", + .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, + }, +#endif }; static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, -- GitLab From 2ce147f36dc5a1f3b49abd8ce3164ad0f04ec863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:13 +0300 Subject: [PATCH 0033/1868] drm/i915: Add chv port D TX wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the TX wells for port D. The Punit subsystem numbers are a total guess at this time. Also I'm not sure these even exist. Certainly the Punit in current hardware doesn't deal with these. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_pm.c | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9d54aee6f8c8..e01a1a0b9613 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -527,6 +527,10 @@ enum punit_power_well { PUNIT_POWER_WELL_DPIO_RX0 = 10, PUNIT_POWER_WELL_DPIO_RX1 = 11, PUNIT_POWER_WELL_DPIO_CMN_D = 12, + /* FIXME: guesswork below */ + PUNIT_POWER_WELL_DPIO_TX_D_LANES_01 = 13, + PUNIT_POWER_WELL_DPIO_TX_D_LANES_23 = 14, + PUNIT_POWER_WELL_DPIO_RX2 = 15, PUNIT_POWER_WELL_NUM, }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 59157635d418..f4a1837c231c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6616,6 +6616,15 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ BIT(POWER_DOMAIN_INIT)) +#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, .enable = i9xx_always_on_power_well_noop, @@ -6833,6 +6842,20 @@ static struct i915_power_well chv_power_wells[] = { .ops = &vlv_dpio_power_well_ops, .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, }, + { + .name = "dpio-tx-d-01", + .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | + CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01, + }, + { + .name = "dpio-tx-d-23", + .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | + CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23, + }, #endif }; -- GitLab From 026b96e293fbe48153ae868308e341f226d76c46 Mon Sep 17 00:00:00 2001 From: Rafael Barbalho Date: Mon, 28 Jul 2014 19:56:27 +0100 Subject: [PATCH 0034/1868] drm/i915: Fix read back of plane stride register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the specifications bit 6 is actually valid in the stride register. Cc: Jesse Barnes Cc: Ville Syrjälä Signed-off-by: Rafael Barbalho Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c0575ea1e196..71957e7184ef 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6211,7 +6211,7 @@ static void i9xx_get_plane_config(struct intel_crtc *crtc, crtc->base.primary->fb->height = ((val >> 0) & 0xfff) + 1; val = I915_READ(DSPSTRIDE(pipe)); - crtc->base.primary->fb->pitches[0] = val & 0xffffff80; + crtc->base.primary->fb->pitches[0] = val & 0xffffffc0; aligned_height = intel_align_height(dev, crtc->base.primary->fb->height, plane_config->tiled); @@ -7247,7 +7247,7 @@ static void ironlake_get_plane_config(struct intel_crtc *crtc, crtc->base.primary->fb->height = ((val >> 0) & 0xfff) + 1; val = I915_READ(DSPSTRIDE(pipe)); - crtc->base.primary->fb->pitches[0] = val & 0xffffff80; + crtc->base.primary->fb->pitches[0] = val & 0xffffffc0; aligned_height = intel_align_height(dev, crtc->base.primary->fb->height, plane_config->tiled); -- GitLab From a5043453aa2412ece984373294529d177324c901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:18 +0300 Subject: [PATCH 0035/1868] drm/i915: Split a few long debug prints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split some WM debug prints to multiple lines. This shouldn't hurt grappability since the important part is at the start and the rest is just repeated stuff for each pipe. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4a1837c231c..a318cd5ad8ed 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1387,7 +1387,8 @@ static void valleyview_update_wm(struct drm_crtc *crtc) plane_sr = cursor_sr = 0; } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", + DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " + "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, planeb_wm, cursorb_wm, plane_sr, cursor_sr); @@ -1443,7 +1444,8 @@ static void g4x_update_wm(struct drm_crtc *crtc) plane_sr = cursor_sr = 0; } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", + DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " + "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, planeb_wm, cursorb_wm, plane_sr, cursor_sr); -- GitLab From aad3d14d25c33c8e510c41aaaf2668e8d32811ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:25 +0300 Subject: [PATCH 0036/1868] drm/i915: Add DP training pattern 3 for CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV supports DP training pattern 3. Add the required stuff. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_dp.c | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e01a1a0b9613..e355fc8d4d59 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3489,6 +3489,8 @@ enum punit_power_well { #define DP_LINK_TRAIN_OFF (3 << 28) #define DP_LINK_TRAIN_MASK (3 << 28) #define DP_LINK_TRAIN_SHIFT 28 +#define DP_LINK_TRAIN_PAT_3_CHV (1 << 14) +#define DP_LINK_TRAIN_MASK_CHV ((3 << 28)|(1<<14)) /* CPT Link training mode */ #define DP_LINK_TRAIN_PAT_1_CPT (0 << 8) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3ea5cef9bbe6..0aa219dff19c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2959,7 +2959,10 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, } } else { - *DP &= ~DP_LINK_TRAIN_MASK; + if (IS_CHERRYVIEW(dev)) + *DP &= ~DP_LINK_TRAIN_MASK_CHV; + else + *DP &= ~DP_LINK_TRAIN_MASK; switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { case DP_TRAINING_PATTERN_DISABLE: @@ -2972,8 +2975,12 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, *DP |= DP_LINK_TRAIN_PAT_2; break; case DP_TRAINING_PATTERN_3: - DRM_ERROR("DP training pattern 3 not supported\n"); - *DP |= DP_LINK_TRAIN_PAT_2; + if (IS_CHERRYVIEW(dev)) { + *DP |= DP_LINK_TRAIN_PAT_3_CHV; + } else { + DRM_ERROR("DP training pattern 3 not supported\n"); + *DP |= DP_LINK_TRAIN_PAT_2; + } break; } } @@ -3260,7 +3267,10 @@ intel_dp_link_down(struct intel_dp *intel_dp) DP &= ~DP_LINK_TRAIN_MASK_CPT; I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT); } else { - DP &= ~DP_LINK_TRAIN_MASK; + if (IS_CHERRYVIEW(dev)) + DP &= ~DP_LINK_TRAIN_MASK_CHV; + else + DP &= ~DP_LINK_TRAIN_MASK; I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE); } POSTING_READ(intel_dp->output_reg); -- GitLab From dcfc3552136fb6996e19b9f6980dc5a6721defd5 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:16 +0100 Subject: [PATCH 0037/1868] drm/i915: Specify when the PLL hw state fields are valid Not all those fields are valid on a given platform. Make it explicit. Unions could also be used, but were cluttering some code paths with if/else ladders. v2: Don't use anonymous unions (Daniel) Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c70b2b67282e..c3beb08813c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -197,10 +197,13 @@ enum intel_dpll_id { #define I915_NUM_PLLS 2 struct intel_dpll_hw_state { + /* i9xx, pch plls */ uint32_t dpll; uint32_t dpll_md; uint32_t fp0; uint32_t fp1; + + /* hsw, bdw */ uint32_t wrpll; }; -- GitLab From 74dd69280bc3f3e84d46b2a0f78901a0d9b4562c Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:17 +0100 Subject: [PATCH 0038/1868] drm/i915: Add a space to the shared DPLL debug message Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 71957e7184ef..0b8769b9422f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1778,7 +1778,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc) if (WARN_ON(pll->refcount == 0)) return; - DRM_DEBUG_KMS("enable %s (active %d, on? %d)for crtc %d\n", + DRM_DEBUG_KMS("enable %s (active %d, on? %d) for crtc %d\n", pll->name, pll->active, pll->on, crtc->base.base.id); -- GitLab From 7d2c81751c858442387fa5158d4cd80c2190d739 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:18 +0100 Subject: [PATCH 0039/1868] drm/i915: Extract the HSW DDI selection code into its own function Future platform will slightly change that. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0b8769b9422f..c3bb5f7fd21b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7607,6 +7607,22 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, return 0; } +static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, + enum port port, + struct intel_crtc_config *pipe_config) +{ + pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); + + switch (pipe_config->ddi_pll_sel) { + case PORT_CLK_SEL_WRPLL1: + pipe_config->shared_dpll = DPLL_ID_WRPLL1; + break; + case PORT_CLK_SEL_WRPLL2: + pipe_config->shared_dpll = DPLL_ID_WRPLL2; + break; + } +} + static void haswell_get_ddi_port_state(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config) { @@ -7620,16 +7636,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); - - switch (pipe_config->ddi_pll_sel) { - case PORT_CLK_SEL_WRPLL1: - pipe_config->shared_dpll = DPLL_ID_WRPLL1; - break; - case PORT_CLK_SEL_WRPLL2: - pipe_config->shared_dpll = DPLL_ID_WRPLL2; - break; - } + haswell_get_ddi_pll(dev_priv, port, pipe_config); if (pipe_config->shared_dpll >= 0) { pll = &dev_priv->shared_dplls[pipe_config->shared_dpll]; -- GitLab From 143b307c43dcfeae41a3b6c24a29fae4c70884f4 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:19 +0100 Subject: [PATCH 0040/1868] drm/i915: Extract the HSW/BDW shared dpll init code So we can easily provide an alternate implementation in the future. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5db0b5552e39..ee7a74c6e93a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1248,10 +1248,8 @@ static const char * const hsw_ddi_pll_names[] = { "WRPLL 2", }; -void intel_ddi_pll_init(struct drm_device *dev) +static void hsw_shared_dplls_init(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t val = I915_READ(LCPLL_CTL); int i; dev_priv->num_shared_dpll = 2; @@ -1264,6 +1262,14 @@ void intel_ddi_pll_init(struct drm_device *dev) dev_priv->shared_dplls[i].get_hw_state = hsw_ddi_pll_get_hw_state; } +} + +void intel_ddi_pll_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t val = I915_READ(LCPLL_CTL); + + hsw_shared_dplls_init(dev_priv); /* The LCPLL register should be turned on by the BIOS. For now let's * just check its state and print errors in case something is wrong. -- GitLab From ea155f32cea99f17371bec00ee9c8e3713a15d4f Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:20 +0100 Subject: [PATCH 0041/1868] drm/i915: Restrict hsw_dp_set_ddi_pll_sel() to HSW/BDW Future platform will use config->ddi_pll_sel in a different way. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0aa219dff19c..16fcfc67f014 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -964,7 +964,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, &pipe_config->dp_m2_n2); } - if (HAS_DDI(dev)) + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) hsw_dp_set_ddi_pll_sel(pipe_config, intel_dp->link_bw); else intel_dp_set_clock(encoder, pipe_config, intel_dp->link_bw); -- GitLab From bf9584bd0e99bd284e115ea8eba9b02a5a2d7b4d Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:21 +0100 Subject: [PATCH 0042/1868] drm/i915: Fix stale comment for intel_ddi_pll_select() Since the run-time PM on DPMS series, this function has an outdated comment. Refresh it a bit. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ee7a74c6e93a..3b6375d3e422 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -709,10 +709,11 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */, } /* - * Tries to find a PLL for the CRTC. If it finds, it increases the refcount and - * stores it in intel_crtc->ddi_pll_sel, so other mode sets won't be able to - * steal the selected PLL. You need to call intel_ddi_pll_enable to actually - * enable the PLL. + * Tries to find a *shared* PLL for the CRTC and store it in + * intel_crtc->ddi_pll_sel. + * + * For private DPLLs, compute_config() should do the selection for us. This + * function should be folded into compute_config() eventually. */ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) { -- GitLab From 0220ab6e00785da008bb3736737b877d45858608 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:22 +0100 Subject: [PATCH 0043/1868] drm/i915: Split the BDW/HSW specific shared pll selection We'll need a different algorithm to select the shared DPLL. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 39 +++++++++++++++++++------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3b6375d3e422..09599851f1a8 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -708,23 +708,10 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */, *r2_out = best.r2; } -/* - * Tries to find a *shared* PLL for the CRTC and store it in - * intel_crtc->ddi_pll_sel. - * - * For private DPLLs, compute_config() should do the selection for us. This - * function should be folded into compute_config() eventually. - */ -bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) +static bool +hsw_ddi_pll_select(struct intel_crtc *intel_crtc, int output, int clock) { - struct drm_crtc *crtc = &intel_crtc->base; - struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - int type = intel_encoder->type; - int clock = intel_crtc->config.port_clock; - - intel_put_shared_dpll(intel_crtc); - - if (type == INTEL_OUTPUT_HDMI) { + if (output == INTEL_OUTPUT_HDMI) { struct intel_shared_dpll *pll; uint32_t val; unsigned p, n2, r2; @@ -750,6 +737,26 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) return true; } + +/* + * Tries to find a *shared* PLL for the CRTC and store it in + * intel_crtc->ddi_pll_sel. + * + * For private DPLLs, compute_config() should do the selection for us. This + * function should be folded into compute_config() eventually. + */ +bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); + int type = intel_encoder->type; + int clock = intel_crtc->config.port_clock; + + intel_put_shared_dpll(intel_crtc); + + return hsw_ddi_pll_select(intel_crtc, type, clock); +} + void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->dev->dev_private; -- GitLab From d664c0cece2dd410d8134aa820112e471e3592dd Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:23 +0100 Subject: [PATCH 0044/1868] drm/i915: Make intel_ddi_calculate_wrpll() HSW/BDW specific Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 09599851f1a8..eb8e494ce569 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -644,8 +644,8 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, } static void -intel_ddi_calculate_wrpll(int clock /* in Hz */, - unsigned *r2_out, unsigned *n2_out, unsigned *p_out) +hsw_ddi_calculate_wrpll(int clock /* in Hz */, + unsigned *r2_out, unsigned *n2_out, unsigned *p_out) { uint64_t freq2k; unsigned p, n2, r2; @@ -709,14 +709,16 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */, } static bool -hsw_ddi_pll_select(struct intel_crtc *intel_crtc, int output, int clock) +hsw_ddi_pll_select(struct intel_crtc *intel_crtc, + struct intel_encoder *intel_encoder, + int clock) { - if (output == INTEL_OUTPUT_HDMI) { + if (intel_encoder->type == INTEL_OUTPUT_HDMI) { struct intel_shared_dpll *pll; uint32_t val; unsigned p, n2, r2; - intel_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); + hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); val = WRPLL_PLL_ENABLE | WRPLL_PLL_LCPLL | WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) | @@ -749,12 +751,11 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) { struct drm_crtc *crtc = &intel_crtc->base; struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - int type = intel_encoder->type; int clock = intel_crtc->config.port_clock; intel_put_shared_dpll(intel_crtc); - return hsw_ddi_pll_select(intel_crtc, type, clock); + return hsw_ddi_pll_select(intel_crtc, intel_encoder, clock); } void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) -- GitLab From ad13d6048f5002f1c5ab21c71a5ee136a2d8e889 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:24 +0100 Subject: [PATCH 0045/1868] drm/i915: Split the CDCLK retrieval per-platform This is only going to get worse, so split it now to avoid adding more cases to the if/else ladder. Suggested-by: Daniel Vetter Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 55 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index eb8e494ce569..b5870fd920ff 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1192,31 +1192,52 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) } } -int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) +static int bdw_get_cdclk_freq(struct drm_i915_private *dev_priv) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450000; + else if (freq == LCPLL_CLK_FREQ_450) + return 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + return 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + return 337500; + else + return 675000; +} + +static int hsw_get_cdclk_freq(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - if (lcpll & LCPLL_CD_SOURCE_FCLK) { + if (lcpll & LCPLL_CD_SOURCE_FCLK) return 800000; - } else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) { + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) return 450000; - } else if (freq == LCPLL_CLK_FREQ_450) { + else if (freq == LCPLL_CLK_FREQ_450) return 450000; - } else if (IS_HASWELL(dev)) { - if (IS_ULT(dev)) - return 337500; - else - return 540000; - } else { - if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; - } + else if (IS_ULT(dev)) + return 337500; + else + return 540000; +} + +int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + + if (IS_BROADWELL(dev)) + return bdw_get_cdclk_freq(dev_priv); + + /* Haswell */ + return hsw_get_cdclk_freq(dev_priv); } static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv, -- GitLab From 3d51278af91f8e96077dad3a4c1cc0b19fa8ca25 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Jul 2014 20:57:08 +0200 Subject: [PATCH 0046/1868] drm/i915: Make ddi_clock_gate() HSW/BDW specific Turns out we were again way too naive and optimistic, of course things will change. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b5870fd920ff..3634575534b4 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -587,8 +587,8 @@ static int intel_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, return (refclk * n * 100) / (p * r); } -void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_config *pipe_config) +static void hsw_ddi_clock_get(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; int link_clock = 0; @@ -643,6 +643,12 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, pipe_config->adjusted_mode.crtc_clock = pipe_config->port_clock; } +void intel_ddi_clock_get(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + hsw_ddi_clock_get(encoder, pipe_config); +} + static void hsw_ddi_calculate_wrpll(int clock /* in Hz */, unsigned *r2_out, unsigned *n2_out, unsigned *p_out) @@ -1480,7 +1486,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, dev_priv->vbt.edp_bpp = pipe_config->pipe_bpp; } - intel_ddi_clock_get(encoder, pipe_config); + hsw_ddi_clock_get(encoder, pipe_config); } static void intel_ddi_destroy(struct drm_encoder *encoder) -- GitLab From 06ffc7789e76a095e85814dbcf7b660344f6b679 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 17 Jul 2014 17:43:46 -0300 Subject: [PATCH 0047/1868] d rm/i915: freeze display before the interrupts and GT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we started using intel_runtime_pm_disable_interrupts() at normal (non-runtime) suspend/resume, we had to remove a WARN from ironlake_disable_display_irq to avoid a case where we were doing the correct thing and the WARN was not really needed. The problem is that the WARN was useful in other cases, and its removal can hide some bugs that we would catch automatically. To be able to add back the WARN, we have to call intel_crtc_control() before interrupts are disabled, which is what this patch currently does. Also notice that Ville's patch from the Watermarks series "drm/i915: Leave interrupts enabled while disabling crtcs during suspend" also did a change that's equivalent to the one we're doing on this patch, with the exception that its original patch, when applied to the current tree, procduces a WARN. Related commits: commit daa390e5ee45cc051d6bf37b296901f2f92b002d Author: Jesse Barnes drm/i915: don't warn if IRQs are disabled when shutting down display IRQs commit e11aa362308f5de467ce355a2a2471321b15a35c Author: Jesse Barnes drm/i915: use runtime irq suspend/resume in freeze/thaw Note that the function part of this patch has already been done in commit 0e32b39ceed665bfa4a77a4bc307b6652b991632 Author: Dave Airlie Date: Fri May 2 14:02:48 2014 +1000 drm/i915: add DP 1.2 MST support (v0.7) with the fixup commit 09b64267c1f72f2670fcde9f11e5453ce365ca23 Author: Dave Airlie Date: Wed Jul 23 14:25:24 2014 +1000 drm/i915: don't suspend gt until after we disable irqs and display (v2) so all that's left from Paulo's patch is reinstating the WARNING. Cc: Ville Syrjälä Cc: Jesse Barnes Signed-off-by: Paulo Zanoni Reviewed-by: Jesse Barnes [danvet: Explain conflict resolution with Dave's DP MST patches with a note in the commit message.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0e44c433cfc3..379cfb5dc731 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -151,7 +151,7 @@ ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask) { assert_spin_locked(&dev_priv->irq_lock); - if (!intel_irqs_enabled(dev_priv)) + if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; if ((dev_priv->irq_mask & mask) != mask) { -- GitLab From 383c5a6a4682f6816fb5a07aebd89c5813c3d1c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:03:57 +0300 Subject: [PATCH 0048/1868] drm/i915: Add cdclk change support for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like the Punit is supposed to support the 400MHz cdclk directly on chv, so we don't need the vlv tricks. FIXME: Punit doesn't seem ready for this yet on current hw Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 +++ drivers/gpu/drm/i915/intel_display.c | 50 ++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e355fc8d4d59..697c04976cd3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -497,6 +497,10 @@ #define BUNIT_REG_BISOC 0x11 #define PUNIT_REG_DSPFREQ 0x36 +#define DSPFREQSTAT_SHIFT_CHV 24 +#define DSPFREQSTAT_MASK_CHV (0x1f << DSPFREQSTAT_SHIFT_CHV) +#define DSPFREQGUAR_SHIFT_CHV 8 +#define DSPFREQGUAR_MASK_CHV (0x1f << DSPFREQGUAR_SHIFT_CHV) #define DSPFREQSTAT_SHIFT 30 #define DSPFREQSTAT_MASK (0x3 << DSPFREQSTAT_SHIFT) #define DSPFREQGUAR_SHIFT 14 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c3bb5f7fd21b..fdf5ec88866a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4501,6 +4501,47 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) vlv_update_cdclk(dev); } +static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val, cmd; + + WARN_ON(dev_priv->display.get_display_clock_speed(dev) != dev_priv->vlv_cdclk_freq); + + switch (cdclk) { + case 400000: + cmd = 3; + break; + case 333333: + case 320000: + cmd = 2; + break; + case 266667: + cmd = 1; + break; + case 200000: + cmd = 0; + break; + default: + WARN_ON(1); + return; + } + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + vlv_update_cdclk(dev); +} + static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, int max_pixclk) { @@ -4569,8 +4610,13 @@ static void valleyview_modeset_global_resources(struct drm_device *dev) int max_pixclk = intel_mode_max_pixclk(dev_priv); int req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk); - if (req_cdclk != dev_priv->vlv_cdclk_freq) - valleyview_set_cdclk(dev, req_cdclk); + if (req_cdclk != dev_priv->vlv_cdclk_freq) { + if (IS_CHERRYVIEW(dev)) + cherryview_set_cdclk(dev, req_cdclk); + else + valleyview_set_cdclk(dev, req_cdclk); + } + modeset_update_crtc_power_domains(dev); } -- GitLab From d49a340d6eb6de45c1a886b71469d110f2dbb57b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:03:58 +0300 Subject: [PATCH 0049/1868] drm/i915: Disable cdclk changes for chv until Punit is ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Punit seems a bit WIP still. Disable cdclk changes until we have hardware where it works. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fdf5ec88866a..a7d0c88a620d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4548,6 +4548,10 @@ static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, int vco = valleyview_get_vco(dev_priv); int freq_320 = (vco << 1) % 320000 != 0 ? 333333 : 320000; + /* FIXME: Punit isn't quite ready yet */ + if (IS_CHERRYVIEW(dev_priv->dev)) + return 400000; + /* * Really only a few cases to deal with, as only 4 CDclks are supported: * 200MHz @@ -5283,6 +5287,10 @@ static int valleyview_get_display_clock_speed(struct drm_device *dev) u32 val; int divider; + /* FIXME: Punit isn't quite ready yet */ + if (IS_CHERRYVIEW(dev)) + return 400000; + mutex_lock(&dev_priv->dpio_lock); val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); mutex_unlock(&dev_priv->dpio_lock); -- GitLab From d17ec4ced6c0907f80f51677a44236da94ecd92d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:03:59 +0300 Subject: [PATCH 0050/1868] drm/i915: Leave DPLL ref clocks on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We enable the DPLL refclock already when bringing up the cmnlane power well, so also leave it on when otherwise disabling the DPLL. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a7d0c88a620d..6ca53b372a4c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1684,7 +1684,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) assert_pipe_disabled(dev_priv, pipe); /* Set PLL en = 0 */ - val = DPLL_SSC_REF_CLOCK_CHV; + val = DPLL_SSC_REF_CLOCK_CHV | DPLL_REFA_CLK_ENABLE_VLV; if (pipe != PIPE_A) val |= DPLL_INTEGRATED_CRI_CLK_VLV; I915_WRITE(DPLL(pipe), val); -- GitLab From 1ae0d1377fda91367b27596001c82e877ec2057e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:00 +0300 Subject: [PATCH 0051/1868] drm/i915: Split chv_update_pll() apart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split chv_update_pll() into two parts ala: commit bdd4b6a655749970cc632aafc5fd596c07b60b1c Author: Daniel Vetter Date: Thu Apr 24 23:55:11 2014 +0200 drm/i915: Extract vlv_prepare_pll Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 30 ++++++++++++++++++---------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6ca53b372a4c..60ba6962026b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -100,6 +100,7 @@ static void ironlake_set_pipeconf(struct drm_crtc *crtc); static void haswell_set_pipeconf(struct drm_crtc *crtc); static void intel_set_pipe_csc(struct drm_crtc *crtc); static void vlv_prepare_pll(struct intel_crtc *crtc); +static void chv_prepare_pll(struct intel_crtc *crtc); static struct intel_encoder *intel_find_encoder(struct intel_connector *connector, int pipe) { @@ -4642,8 +4643,12 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) is_dsi = intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI); - if (!is_dsi && !IS_CHERRYVIEW(dev)) - vlv_prepare_pll(intel_crtc); + if (!is_dsi) { + if (IS_CHERRYVIEW(dev)) + chv_prepare_pll(intel_crtc); + else + vlv_prepare_pll(intel_crtc); + } /* Set up the display plane register */ dspcntr = DISPPLANE_GAMMA_ENABLE; @@ -5691,6 +5696,18 @@ static void vlv_prepare_pll(struct intel_crtc *crtc) } static void chv_update_pll(struct intel_crtc *crtc) +{ + crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV | + DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS | + DPLL_VCO_ENABLE; + if (crtc->pipe != PIPE_A) + crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV; + + crtc->config.dpll_hw_state.dpll_md = + (crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; +} + +static void chv_prepare_pll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -5701,15 +5718,6 @@ static void chv_update_pll(struct intel_crtc *crtc) u32 bestn, bestm1, bestm2, bestp1, bestp2, bestm2_frac; int refclk; - crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS | - DPLL_VCO_ENABLE; - if (pipe != PIPE_A) - crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV; - - crtc->config.dpll_hw_state.dpll_md = - (crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; - bestn = crtc->config.dpll.n; bestm2_frac = crtc->config.dpll.m2 & 0x3fffff; bestm1 = crtc->config.dpll.m1; -- GitLab From 625695f8c3383765fd8974616aa57ffdbc644f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:02 +0300 Subject: [PATCH 0052/1868] drm/i915: Call intel_{dp, hdmi}_prepare for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV was forgotten the intel_{dp,hdmi}_prepare() were introduced (or the chv patches were still in flight?). Call these when enabling the ports. Things tend to work much better when we actually write something to the port registers :) Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 2 ++ drivers/gpu/drm/i915/intel_hdmi.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 16fcfc67f014..33a45a819525 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2281,6 +2281,8 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) enum pipe pipe = intel_crtc->pipe; u32 val; + intel_dp_prepare(encoder); + mutex_lock(&dev_priv->dpio_lock); /* program left/right clock distribution */ diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f9151f6641d9..df25b740b348 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1260,6 +1260,8 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) enum pipe pipe = intel_crtc->pipe; u32 val; + intel_hdmi_prepare(encoder); + mutex_lock(&dev_priv->dpio_lock); /* program left/right clock distribution */ -- GitLab From 1fb44505f6c547742fcbcba4d3999fb324b5f587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:03 +0300 Subject: [PATCH 0053/1868] drm/i915: Clarify CHV swing margin/deemph bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV display PHY registes have two swing margin/deemph settings. Make it clear which ones we're using. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++-- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/intel_hdmi.c | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 697c04976cd3..35553aa0a82f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -855,8 +855,8 @@ enum punit_power_well { #define _VLV_TX_DW2_CH0 0x8288 #define _VLV_TX_DW2_CH1 0x8488 -#define DPIO_SWING_MARGIN_SHIFT 16 -#define DPIO_SWING_MARGIN_MASK (0xff << DPIO_SWING_MARGIN_SHIFT) +#define DPIO_SWING_MARGIN000_SHIFT 16 +#define DPIO_SWING_MARGIN000_MASK (0xff << DPIO_SWING_MARGIN000_SHIFT) #define DPIO_UNIQ_TRANS_SCALE_SHIFT 8 #define VLV_TX_DW2(ch) _PORT(ch, _VLV_TX_DW2_CH0, _VLV_TX_DW2_CH1) @@ -864,12 +864,16 @@ enum punit_power_well { #define _VLV_TX_DW3_CH1 0x848c /* The following bit for CHV phy */ #define DPIO_TX_UNIQ_TRANS_SCALE_EN (1<<27) +#define DPIO_SWING_MARGIN101_SHIFT 16 +#define DPIO_SWING_MARGIN101_MASK (0xff << DPIO_SWING_MARGIN101_SHIFT) #define VLV_TX_DW3(ch) _PORT(ch, _VLV_TX_DW3_CH0, _VLV_TX_DW3_CH1) #define _VLV_TX_DW4_CH0 0x8290 #define _VLV_TX_DW4_CH1 0x8490 #define DPIO_SWING_DEEMPH9P5_SHIFT 24 #define DPIO_SWING_DEEMPH9P5_MASK (0xff << DPIO_SWING_DEEMPH9P5_SHIFT) +#define DPIO_SWING_DEEMPH6P0_SHIFT 16 +#define DPIO_SWING_DEEMPH6P0_MASK (0xff << DPIO_SWING_DEEMPH6P0_SHIFT) #define VLV_TX_DW4(ch) _PORT(ch, _VLV_TX_DW4_CH0, _VLV_TX_DW4_CH1) #define _VLV_TX3_DW4_CH0 0x690 diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 33a45a819525..95b972736733 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2649,8 +2649,8 @@ static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp) /* Program swing margin */ for (i = 0; i < 4; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); - val &= ~DPIO_SWING_MARGIN_MASK; - val |= margin_reg_value << DPIO_SWING_MARGIN_SHIFT; + val &= ~DPIO_SWING_MARGIN000_MASK; + val |= margin_reg_value << DPIO_SWING_MARGIN000_SHIFT; vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index df25b740b348..5f47d359a991 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1431,8 +1431,8 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) for (i = 0; i < 4; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); - val &= ~DPIO_SWING_MARGIN_MASK; - val |= 102 << DPIO_SWING_MARGIN_SHIFT; + val &= ~DPIO_SWING_MARGIN000_MASK; + val |= 102 << DPIO_SWING_MARGIN000_SHIFT; vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); } -- GitLab From 7f0c860533ff2de4b3bb84f71d5ce238fffe4d63 Mon Sep 17 00:00:00 2001 From: Shobhit Kumar Date: Wed, 30 Jul 2014 20:34:57 +0530 Subject: [PATCH 0054/1868] drm/i915: Add support for Video Burst Mode for MIPI DSI v2: Updated the error log as suggested by Imre Signed-off-by: Shobhit Kumar Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_bios.h | 3 +- drivers/gpu/drm/i915/intel_dsi.c | 22 ++++++++----- drivers/gpu/drm/i915/intel_dsi.h | 2 ++ drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 38 ++++++++++++++++++++-- drivers/gpu/drm/i915/intel_dsi_pll.c | 9 ++--- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index b98667796337..905999bee2ac 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -802,7 +802,8 @@ struct mipi_config { u16 rsvd4; - u8 rsvd5[5]; + u8 rsvd5; + u32 target_burst_mode_freq; u32 dsi_ddr_clk; u32 bridge_ref_clk; diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 670c29a7b5dd..aea8f3383c26 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -423,9 +423,11 @@ static u16 txclkesc(u32 divider, unsigned int us) } /* return pixels in terms of txbyteclkhs */ -static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count) +static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count, + u16 burst_mode_ratio) { - return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp, 8), lane_count); + return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp * burst_mode_ratio, + 8 * 100), lane_count); } static void set_dsi_timings(struct drm_encoder *encoder, @@ -451,10 +453,12 @@ static void set_dsi_timings(struct drm_encoder *encoder, vbp = mode->vtotal - mode->vsync_end; /* horizontal values are in terms of high speed byte clock */ - hactive = txbyteclkhs(hactive, bpp, lane_count); - hfp = txbyteclkhs(hfp, bpp, lane_count); - hsync = txbyteclkhs(hsync, bpp, lane_count); - hbp = txbyteclkhs(hbp, bpp, lane_count); + hactive = txbyteclkhs(hactive, bpp, lane_count, + intel_dsi->burst_mode_ratio); + hfp = txbyteclkhs(hfp, bpp, lane_count, intel_dsi->burst_mode_ratio); + hsync = txbyteclkhs(hsync, bpp, lane_count, + intel_dsi->burst_mode_ratio); + hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio); I915_WRITE(MIPI_HACTIVE_AREA_COUNT(pipe), hactive); I915_WRITE(MIPI_HFP_COUNT(pipe), hfp); @@ -541,12 +545,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder) intel_dsi->video_mode_format == VIDEO_MODE_BURST) { I915_WRITE(MIPI_HS_TX_TIMEOUT(pipe), txbyteclkhs(adjusted_mode->htotal, bpp, - intel_dsi->lane_count) + 1); + intel_dsi->lane_count, + intel_dsi->burst_mode_ratio) + 1); } else { I915_WRITE(MIPI_HS_TX_TIMEOUT(pipe), txbyteclkhs(adjusted_mode->vtotal * adjusted_mode->htotal, - bpp, intel_dsi->lane_count) + 1); + bpp, intel_dsi->lane_count, + intel_dsi->burst_mode_ratio) + 1); } I915_WRITE(MIPI_LP_RX_TIMEOUT(pipe), intel_dsi->lp_rx_timeout); I915_WRITE(MIPI_TURN_AROUND_TIMEOUT(pipe), intel_dsi->turn_arnd_val); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index fd51867fd0d3..657eb5c1b9d8 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -116,6 +116,8 @@ struct intel_dsi { u16 clk_hs_to_lp_count; u16 init_count; + u32 pclk; + u16 burst_mode_ratio; /* all delays in ms */ u16 backlight_off_delay; diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 47c7584a4aa0..f6bdd44069ce 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -271,6 +271,8 @@ static bool generic_init(struct intel_dsi_device *dsi) u32 ths_prepare_ns, tclk_trail_ns; u32 tclk_prepare_clkzero, ths_prepare_hszero; u32 lp_to_hs_switch, hs_to_lp_switch; + u32 pclk, computed_ddr; + u16 burst_mode_ratio; DRM_DEBUG_KMS("\n"); @@ -284,8 +286,6 @@ static bool generic_init(struct intel_dsi_device *dsi) else if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB565) bits_per_pixel = 16; - bitrate = (mode->clock * bits_per_pixel) / intel_dsi->lane_count; - intel_dsi->operation_mode = mipi_config->is_cmd_mode; intel_dsi->video_mode_format = mipi_config->video_transfer_mode; intel_dsi->escape_clk_div = mipi_config->byte_clk_sel; @@ -297,6 +297,40 @@ static bool generic_init(struct intel_dsi_device *dsi) intel_dsi->video_frmt_cfg_bits = mipi_config->bta_enabled ? DISABLE_VIDEO_BTA : 0; + pclk = mode->clock; + + /* Burst Mode Ratio + * Target ddr frequency from VBT / non burst ddr freq + * multiply by 100 to preserve remainder + */ + if (intel_dsi->video_mode_format == VIDEO_MODE_BURST) { + if (mipi_config->target_burst_mode_freq) { + computed_ddr = + (pclk * bits_per_pixel) / intel_dsi->lane_count; + + if (mipi_config->target_burst_mode_freq < + computed_ddr) { + DRM_ERROR("Burst mode freq is less than computed\n"); + return false; + } + + burst_mode_ratio = DIV_ROUND_UP( + mipi_config->target_burst_mode_freq * 100, + computed_ddr); + + pclk = DIV_ROUND_UP(pclk * burst_mode_ratio, 100); + } else { + DRM_ERROR("Burst mode target is not set\n"); + return false; + } + } else + burst_mode_ratio = 100; + + intel_dsi->burst_mode_ratio = burst_mode_ratio; + intel_dsi->pclk = pclk; + + bitrate = (pclk * bits_per_pixel) / intel_dsi->lane_count; + switch (intel_dsi->escape_clk_div) { case 0: tlpx_ns = 50; diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index d8bb1ea2f0da..06fad93a68c8 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -134,8 +134,7 @@ static u32 dsi_rr_formula(const struct drm_display_mode *mode, #else /* Get DSI clock from pixel clock */ -static u32 dsi_clk_from_pclk(const struct drm_display_mode *mode, - int pixel_format, int lane_count) +static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count) { u32 dsi_clk_khz; u32 bpp; @@ -156,7 +155,7 @@ static u32 dsi_clk_from_pclk(const struct drm_display_mode *mode, /* DSI data rate = pixel clock * bits per pixel / lane count pixel clock is converted from KHz to Hz */ - dsi_clk_khz = DIV_ROUND_CLOSEST(mode->clock * bpp, lane_count); + dsi_clk_khz = DIV_ROUND_CLOSEST(pclk * bpp, lane_count); return dsi_clk_khz; } @@ -228,14 +227,12 @@ static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp) static void vlv_configure_dsi_pll(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); int ret; struct dsi_mnp dsi_mnp; u32 dsi_clk; - dsi_clk = dsi_clk_from_pclk(mode, intel_dsi->pixel_format, + dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, intel_dsi->lane_count); ret = dsi_calc_mnp(dsi_clk, &dsi_mnp); -- GitLab From 7f3de8336fc8c44bede43c57e40448171b12ef68 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Jul 2014 22:34:27 +0200 Subject: [PATCH 0055/1868] drm/i915: Align intel_dsi*.c files a bit I'm not really that insisting on checkpath compliance, but ragged function paramter alignment does get me. Please adjust your editor to just do this for you. Cc: Shobhit Kumar Cc: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dsi.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/intel_dsi_cmd.c | 2 +- drivers/gpu/drm/i915/intel_dsi_pll.c | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index aea8f3383c26..5bd9e09ad3c5 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -184,7 +184,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder) /* update the hw state for DPLL */ intel_crtc->config.dpll_hw_state.dpll = DPLL_INTEGRATED_CLOCK_VLV | - DPLL_REFA_CLK_ENABLE_VLV; + DPLL_REFA_CLK_ENABLE_VLV; tmp = I915_READ(DSPCLK_GATE_D); tmp |= DPOUNIT_CLOCK_GATE_DISABLE; @@ -259,8 +259,8 @@ static void intel_dsi_disable(struct intel_encoder *encoder) temp = I915_READ(MIPI_CTRL(pipe)); temp &= ~ESCAPE_CLOCK_DIVIDER_MASK; I915_WRITE(MIPI_CTRL(pipe), temp | - intel_dsi->escape_clk_div << - ESCAPE_CLOCK_DIVIDER_SHIFT); + intel_dsi->escape_clk_div << + ESCAPE_CLOCK_DIVIDER_SHIFT); I915_WRITE(MIPI_EOT_DISABLE(pipe), CLOCKSTOP); @@ -297,7 +297,7 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) usleep_range(2000, 2500); if (wait_for(((I915_READ(MIPI_PORT_CTRL(pipe)) & AFE_LATCHOUT) - == 0x00000), 30)) + == 0x00000), 30)) DRM_ERROR("DSI LP not going Low\n"); val = I915_READ(MIPI_PORT_CTRL(pipe)); @@ -427,7 +427,7 @@ static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count, u16 burst_mode_ratio) { return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp * burst_mode_ratio, - 8 * 100), lane_count); + 8 * 100), lane_count); } static void set_dsi_timings(struct drm_encoder *encoder, @@ -454,10 +454,10 @@ static void set_dsi_timings(struct drm_encoder *encoder, /* horizontal values are in terms of high speed byte clock */ hactive = txbyteclkhs(hactive, bpp, lane_count, - intel_dsi->burst_mode_ratio); + intel_dsi->burst_mode_ratio); hfp = txbyteclkhs(hfp, bpp, lane_count, intel_dsi->burst_mode_ratio); hsync = txbyteclkhs(hsync, bpp, lane_count, - intel_dsi->burst_mode_ratio); + intel_dsi->burst_mode_ratio); hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio); I915_WRITE(MIPI_HACTIVE_AREA_COUNT(pipe), hactive); @@ -582,7 +582,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder) * XXX: write MIPI_STOP_STATE_STALL? */ I915_WRITE(MIPI_HIGH_LOW_SWITCH_COUNT(pipe), - intel_dsi->hs_to_lp_count); + intel_dsi->hs_to_lp_count); /* XXX: low power clock equivalence in terms of byte clock. the number * of byte clocks occupied in one low power clock. based on txbyteclkhs @@ -607,10 +607,10 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder) * 64 like 1366 x 768. Enable RANDOM resolution support for such * panels by default */ I915_WRITE(MIPI_VIDEO_MODE_FORMAT(pipe), - intel_dsi->video_frmt_cfg_bits | - intel_dsi->video_mode_format | - IP_TG_CONFIG | - RANDOM_DPI_DISPLAY_RESOLUTION); + intel_dsi->video_frmt_cfg_bits | + intel_dsi->video_mode_format | + IP_TG_CONFIG | + RANDOM_DPI_DISPLAY_RESOLUTION); } static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_dsi_cmd.c b/drivers/gpu/drm/i915/intel_dsi_cmd.c index 7f1430ac8543..f4767fd2ebeb 100644 --- a/drivers/gpu/drm/i915/intel_dsi_cmd.c +++ b/drivers/gpu/drm/i915/intel_dsi_cmd.c @@ -430,7 +430,7 @@ void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi) u32 mask; mask = LP_CTRL_FIFO_EMPTY | HS_CTRL_FIFO_EMPTY | - LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY; + LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY; if (wait_for((I915_READ(MIPI_GEN_FIFO_STAT(pipe)) & mask) == mask, 100)) DRM_ERROR("DPI FIFOs are not empty\n"); diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 06fad93a68c8..fa7a6ca34cd6 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -190,7 +190,7 @@ static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp) for (m = 62; m <= 92; m++) { for (p = 2; p <= 6; p++) { /* Find the optimal m and p divisors - with minimal error +/- the required clock */ + with minimal error +/- the required clock */ calc_dsi_clk = (m * ref_clk) / p; if (calc_dsi_clk == target_dsi_clk) { calc_m = m; @@ -233,7 +233,7 @@ static void vlv_configure_dsi_pll(struct intel_encoder *encoder) u32 dsi_clk; dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, - intel_dsi->lane_count); + intel_dsi->lane_count); ret = dsi_calc_mnp(dsi_clk, &dsi_mnp); if (ret) { @@ -315,8 +315,8 @@ static void assert_bpp_mismatch(int pixel_format, int pipe_bpp) } WARN(bpp != pipe_bpp, - "bpp match assertion failure (expected %d, current %d)\n", - bpp, pipe_bpp); + "bpp match assertion failure (expected %d, current %d)\n", + bpp, pipe_bpp); } u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp) -- GitLab From da46f936bb0396f6a0fb87c2786f541e9f19a73c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Aug 2014 02:04:45 -0700 Subject: [PATCH 0056/1868] drm/i915: Introduce FBC False Color for debug purposes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this bit enabled, HW changes the color when compressing frames for debug purposes. ALthough the simple way to enable a single bit is over intel_reg_write, this value is overwriten on next update_fbc so depending on the workload it is not possible to set this bit with intel-gpu-tools. So this patch introduces a persistent way to enable false color over debugfs. v2: Use DEFINE_SIMPLE_ATTRIBUTE as Daniel suggested v3: (Ville) only do false color for IVB+ since according to spec bit is MBZ before IVB. v4: We don't have FBC on valleyview nor on cherryview (Ben) v5: s/!HAS_PCH_SPLIT/!HAS_FBC (Ville) Cc: Ville Syrjälä Reviewed-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 42 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 3 +++ 4 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9e737b771c40..aea1a819c775 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1433,6 +1433,47 @@ static int i915_fbc_status(struct seq_file *m, void *unused) return 0; } +static int i915_fbc_fc_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev)) + return -ENODEV; + + drm_modeset_lock_all(dev); + *val = dev_priv->fbc.false_color; + drm_modeset_unlock_all(dev); + + return 0; +} + +static int i915_fbc_fc_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 reg; + + if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev)) + return -ENODEV; + + drm_modeset_lock_all(dev); + + reg = I915_READ(ILK_DPFC_CONTROL); + dev_priv->fbc.false_color = val; + + I915_WRITE(ILK_DPFC_CONTROL, val ? + (reg | FBC_CTL_FALSE_COLOR) : + (reg & ~FBC_CTL_FALSE_COLOR)); + + drm_modeset_unlock_all(dev); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops, + i915_fbc_fc_get, i915_fbc_fc_set, + "%llu\n"); + static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_info_node *node = m->private; @@ -3957,6 +3998,7 @@ static const struct i915_debugfs_files { {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, {"i915_spr_wm_latency", &i915_spr_wm_latency_fops}, {"i915_cur_wm_latency", &i915_cur_wm_latency_fops}, + {"i915_fbc_false_color", &i915_fbc_fc_fops}, }; void intel_display_crc_init(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3beb08813c6..ce30bb3ed566 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -637,6 +637,8 @@ struct i915_fbc { struct drm_mm_node compressed_fb; struct drm_mm_node *compressed_llb; + bool false_color; + struct intel_fbc_work { struct delayed_work work; struct drm_crtc *crtc; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 35553aa0a82f..ca37febbc2ae 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1540,6 +1540,7 @@ enum punit_power_well { /* Framebuffer compression for Ironlake */ #define ILK_DPFC_CB_BASE 0x43200 #define ILK_DPFC_CONTROL 0x43208 +#define FBC_CTL_FALSE_COLOR (1<<10) /* The bit 28-8 is reserved */ #define DPFC_RESERVED (0x1FFFFF00) #define ILK_DPFC_RECOMP_CTL 0x4320c diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a318cd5ad8ed..ab80df2909e0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -309,6 +309,9 @@ static void gen7_enable_fbc(struct drm_crtc *crtc) dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; + if (dev_priv->fbc.false_color) + dpfc_ctl |= FBC_CTL_FALSE_COLOR; + I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); if (IS_IVYBRIDGE(dev)) { -- GitLab From ce4dd49e97813740bd2b03ecdc51521be10f3bf1 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:54 +0100 Subject: [PATCH 0057/1868] drm/i915: Gather the HDMI level shifter logic into one place The knowledge about the HDMI/DVI DDI translation table was scattered around. - info->hdmi_level_shift was initialized with 6, the index of the 800 mV, 0dB translation - A check on the VBT value was done to ensure it wasn't overflowing the translation table (< 0xC) - The actual programming was done in intel_ddi.c As we need to change that knowledge for Broadwell, let's gather everything into one place. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/intel_bios.c | 13 +++++-------- drivers/gpu/drm/i915/intel_ddi.c | 14 +++++++++++++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce30bb3ed566..0d7e55f2a8a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1232,6 +1232,12 @@ enum modeset_restore { }; struct ddi_vbt_port_info { + /* + * This is an index in the HDMI/DVI DDI buffer translation table. + * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't + * populate this field. + */ +#define HDMI_LEVEL_SHIFT_UNKNOWN 0xff uint8_t hdmi_level_shift; uint8_t supports_dvi:1; diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index a66955037e4e..031c5657255d 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -976,12 +976,10 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (bdb->version >= 158) { /* The VBT HDMI level shift values match the table we have. */ hdmi_level_shift = child->raw[7] & 0xF; - if (hdmi_level_shift < 0xC) { - DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", - port_name(port), - hdmi_level_shift); - info->hdmi_level_shift = hdmi_level_shift; - } + DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", + port_name(port), + hdmi_level_shift); + info->hdmi_level_shift = hdmi_level_shift; } } @@ -1114,8 +1112,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; - /* Recommended BSpec default: 800mV 0dB. */ - info->hdmi_level_shift = 6; + info->hdmi_level_shift = HDMI_LEVEL_SHIFT_UNKNOWN; info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3634575534b4..3af8340b21ec 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) { struct drm_i915_private *dev_priv = dev->dev_private; u32 reg; - int i; + int i, n_hdmi_entries, hdmi_800mV_0dB; int hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; const u32 *ddi_translations_fdi; const u32 *ddi_translations_dp; @@ -156,15 +156,21 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_800mV_0dB = 6; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_800mV_0dB = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_800mV_0dB = 6; } switch (port) { @@ -193,6 +199,12 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) I915_WRITE(reg, ddi_translations[i]); reg += 4; } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_800mV_0dB; + /* Entry 9 is for HDMI: */ for (i = 0; i < 2; i++) { I915_WRITE(reg, hsw_ddi_translations_hdmi[hdmi_level * 2 + i]); -- GitLab From a26aa8baee6c274fc23efccf46e891e63c8d0a30 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:55 +0100 Subject: [PATCH 0058/1868] drm/i915/bdw: Provide the BDW specific HDMI buffer translation table Among the changes, the tables has only 10 entries instead of 12 on HSW and the index the the 800mV/0dB entry has changed. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3af8340b21ec..75ac0b29aa3e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -111,6 +111,20 @@ static const u32 bdw_ddi_translations_fdi[] = { 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; +static const u32 bdw_ddi_translations_hdmi[] = { + /* Idx NT mV diff T mV diff db */ + 0x00FFFFFF, 0x0007000E, /* 0: 400 400 0 */ + 0x00D75FFF, 0x000E000A, /* 1: 400 600 3.5 */ + 0x00BEFFFF, 0x00140006, /* 2: 400 800 6 */ + 0x00FFFFFF, 0x0009000D, /* 3: 450 450 0 */ + 0x00FFFFFF, 0x000E000A, /* 4: 600 600 0 */ + 0x00D7FFFF, 0x00140006, /* 5: 600 800 2.5 */ + 0x80CB2FFF, 0x001B0002, /* 6: 600 1000 4.5 */ + 0x00FFFFFF, 0x00140006, /* 7: 800 800 0 */ + 0x80E79FFF, 0x001B0002, /* 8: 800 1000 2 */ + 0x80FFFFFF, 0x001B0002, /* 9: 1000 1000 0 */ +}; + enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) { struct drm_encoder *encoder = &intel_encoder->base; @@ -150,18 +164,21 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) const u32 *ddi_translations_fdi; const u32 *ddi_translations_dp; const u32 *ddi_translations_edp; + const u32 *ddi_translations_hdmi; const u32 *ddi_translations; if (IS_BROADWELL(dev)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_800mV_0dB = 6; + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_800mV_0dB = 7; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; + ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); hdmi_800mV_0dB = 6; } else { @@ -169,8 +186,9 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_800mV_0dB = 6; + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_800mV_0dB = 7; } switch (port) { @@ -207,7 +225,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) /* Entry 9 is for HDMI: */ for (i = 0; i < 2; i++) { - I915_WRITE(reg, hsw_ddi_translations_hdmi[hdmi_level * 2 + i]); + I915_WRITE(reg, ddi_translations_hdmi[hdmi_level * 2 + i]); reg += 4; } } -- GitLab From 156ae28c9f327d2c026e91cfacb5e224bb760d66 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:56 +0100 Subject: [PATCH 0059/1868] drm/i915/bdw: Remove the HDMI/DVI entry from the DP/eDP/FDI tables We always write entries 0 to 8 from the DDI translation tables and then entry 9 for HDMI/DVI with the help of the VBT. We then don't need the failsafe HDMI entry in the DP/eDP/FDI tables. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 75ac0b29aa3e..694e1c6118aa 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -82,7 +82,6 @@ static const u32 bdw_ddi_translations_edp[] = { 0x00BEEFFF, 0x000A000C, 0x00FFFFFF, 0x0005000F, 0x00DB6FFF, 0x000A000C, - 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; static const u32 bdw_ddi_translations_dp[] = { @@ -95,7 +94,6 @@ static const u32 bdw_ddi_translations_dp[] = { 0x80CB2FFF, 0x001B0002, 0x00F7DFFF, 0x00180004, 0x80D75FFF, 0x001B0002, - 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; static const u32 bdw_ddi_translations_fdi[] = { @@ -108,7 +106,6 @@ static const u32 bdw_ddi_translations_fdi[] = { 0x00C30FFF, 0x000C0000, 0x00FFFFFF, 0x00070006, 0x00D75FFF, 0x000C0000, - 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; static const u32 bdw_ddi_translations_hdmi[] = { -- GitLab From ac921bdde92a354e8c59ea185dff26dc2611ee81 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:57 +0100 Subject: [PATCH 0060/1868] drm/i915: Remove now useless comments about the translation values We used to carry a default HDMI value in entry 9, but this entry got removed for both HSW and BDW. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 694e1c6118aa..ca1f9a8a7d03 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -33,7 +33,7 @@ * automatically adapt to HDMI connections as well */ static const u32 hsw_ddi_translations_dp[] = { - 0x00FFFFFF, 0x0006000E, /* DP parameters */ + 0x00FFFFFF, 0x0006000E, 0x00D75FFF, 0x0005000A, 0x00C30FFF, 0x00040006, 0x80AAAFFF, 0x000B0000, @@ -45,7 +45,7 @@ static const u32 hsw_ddi_translations_dp[] = { }; static const u32 hsw_ddi_translations_fdi[] = { - 0x00FFFFFF, 0x0007000E, /* FDI parameters */ + 0x00FFFFFF, 0x0007000E, 0x00D75FFF, 0x000F000A, 0x00C30FFF, 0x00060006, 0x00AAAFFF, 0x001E0000, @@ -73,7 +73,7 @@ static const u32 hsw_ddi_translations_hdmi[] = { }; static const u32 bdw_ddi_translations_edp[] = { - 0x00FFFFFF, 0x00000012, /* eDP parameters */ + 0x00FFFFFF, 0x00000012, 0x00EBAFFF, 0x00020011, 0x00C71FFF, 0x0006000F, 0x00AAAFFF, 0x000E000A, @@ -85,7 +85,7 @@ static const u32 bdw_ddi_translations_edp[] = { }; static const u32 bdw_ddi_translations_dp[] = { - 0x00FFFFFF, 0x0007000E, /* DP parameters */ + 0x00FFFFFF, 0x0007000E, 0x00D75FFF, 0x000E000A, 0x00BEFFFF, 0x00140006, 0x80B2CFFF, 0x001B0002, @@ -97,7 +97,7 @@ static const u32 bdw_ddi_translations_dp[] = { }; static const u32 bdw_ddi_translations_fdi[] = { - 0x00FFFFFF, 0x0001000E, /* FDI parameters */ + 0x00FFFFFF, 0x0001000E, 0x00D75FFF, 0x0004000A, 0x00C30FFF, 0x00070006, 0x00AAAFFF, 0x000C0000, -- GitLab From 7fad3594bf2c66843c7d1e09005ff845a94524ab Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 1 Aug 2014 16:19:54 -0300 Subject: [PATCH 0061/1868] drm/i915: remove duplicate register defines cat i915_reg.h | sort | uniq -d | grep define Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ca37febbc2ae..7f9a81087ebc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3756,7 +3756,6 @@ enum punit_power_well { #define PIPE_VSYNC_INTERRUPT_STATUS (1UL<<9) #define PIPE_DISPLAY_LINE_COMPARE_STATUS (1UL<<8) #define PIPE_DPST_EVENT_STATUS (1UL<<7) -#define PIPE_LEGACY_BLC_EVENT_STATUS (1UL<<6) #define PIPE_A_PSR_STATUS_VLV (1UL<<6) #define PIPE_LEGACY_BLC_EVENT_STATUS (1UL<<6) #define PIPE_ODD_FIELD_INTERRUPT_STATUS (1UL<<5) @@ -5431,7 +5430,6 @@ enum punit_power_well { #define VLV_GTLC_ALLOWWAKEERR (1 << 1) #define VLV_GTLC_PW_MEDIA_STATUS_MASK (1 << 5) #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) -#define VLV_GTLC_SURVIVABILITY_REG 0x130098 #define FORCEWAKE_MT 0xa188 /* multi-threaded */ #define FORCEWAKE_KERNEL 0x1 #define FORCEWAKE_USER 0x2 -- GitLab From 4079b8d1c3e38b6f18fb31e2997fa25276feea07 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 5 Aug 2014 10:39:42 +0100 Subject: [PATCH 0062/1868] drm/i915: Demote the DRRS messages to debug messages While those messages are interesting, there aren't _that_ interesting. We don't need them in the kernel logs by default. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 95b972736733..3b88255d87dc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4429,7 +4429,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port, } if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) { - DRM_INFO("VBT doesn't support DRRS\n"); + DRM_DEBUG_KMS("VBT doesn't support DRRS\n"); return NULL; } @@ -4437,7 +4437,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port, (dev, fixed_mode, connector); if (!downclock_mode) { - DRM_INFO("DRRS not supported\n"); + DRM_DEBUG_KMS("DRRS not supported\n"); return NULL; } @@ -4448,7 +4448,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port, intel_dp->drrs_state.type = dev_priv->vbt.drrs_type; intel_dp->drrs_state.refresh_rate_type = DRRS_HIGH_RR; - DRM_INFO("seamless DRRS supported for eDP panel.\n"); + DRM_DEBUG_KMS("seamless DRRS supported for eDP panel.\n"); return downclock_mode; } -- GitLab From b2784e151903628a086d2ee12cf943690216cd6c Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 5 Aug 2014 11:29:37 +0100 Subject: [PATCH 0063/1868] drm/i915: Introduce a for_each_intel_encoder() macro Following the established idom, let's provide a macro to iterate through the encoders. spatch helps, once more, for the substitution: @@ iterator name list_for_each_entry; iterator name for_each_intel_encoder; struct intel_encoder * encoder; struct drm_device * dev; @@ -list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { +for_each_intel_encoder(dev, encoder) { ... } I also modified a few call sites by hand where a pointer to mode_config was directly used (to avoid overflowing 80 chars). Signed-off-by: Damien Lespiau [danvet: Wrap paramters correctly in the macro and remove spurious space checkpatch noticed.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/i915_irq.c | 8 ++-- drivers/gpu/drm/i915/intel_display.c | 59 ++++++++++------------------ drivers/gpu/drm/i915/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/intel_lvds.c | 3 +- 6 files changed, 31 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aea1a819c775..330caa1ab9f9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2708,8 +2708,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe, *source = INTEL_PIPE_CRC_SOURCE_PIPE; drm_modeset_lock_all(dev); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (!encoder->base.crtc) continue; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0d7e55f2a8a2..73d2308e2377 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -171,6 +171,11 @@ enum hpd_pin { #define for_each_intel_crtc(dev, intel_crtc) \ list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) +#define for_each_intel_encoder(dev, intel_encoder) \ + list_for_each_entry(intel_encoder, \ + &(dev)->mode_config.encoder_list, \ + base.head) + #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ if ((intel_encoder)->base.crtc == (__crtc)) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 379cfb5dc731..87abe8679495 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3522,18 +3522,17 @@ static void cherryview_irq_preinstall(struct drm_device *dev) static void ibx_hpd_irq_setup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *intel_encoder; u32 hotplug_irqs, hotplug, enabled_irqs = 0; if (HAS_PCH_IBX(dev)) { hotplug_irqs = SDE_HOTPLUG_MASK; - list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head) + for_each_intel_encoder(dev, intel_encoder) if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED) enabled_irqs |= hpd_ibx[intel_encoder->hpd_pin]; } else { hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head) + for_each_intel_encoder(dev, intel_encoder) if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED) enabled_irqs |= hpd_cpt[intel_encoder->hpd_pin]; } @@ -4452,7 +4451,6 @@ static int i965_irq_postinstall(struct drm_device *dev) static void i915_hpd_irq_setup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *intel_encoder; u32 hotplug_en; @@ -4463,7 +4461,7 @@ static void i915_hpd_irq_setup(struct drm_device *dev) hotplug_en &= ~HOTPLUG_INT_EN_MASK; /* Note HDMI and DP share hotplug bits */ /* enable bits are the same for all generations */ - list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head) + for_each_intel_encoder(dev, intel_encoder) if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED) hotplug_en |= hpd_mask_i915[intel_encoder->hpd_pin]; /* Programming the CRT detection parameters tends diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 60ba6962026b..f32a94544940 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6405,7 +6405,6 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, static void ironlake_init_pch_refclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; u32 val, final; bool has_lvds = false; @@ -6415,8 +6414,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) bool can_ssc = false; /* We need to take the global config into account */ - list_for_each_entry(encoder, &mode_config->encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: has_panel = true; @@ -6723,11 +6721,10 @@ static void lpt_disable_clkout_dp(struct drm_device *dev) static void lpt_init_pch_refclk(struct drm_device *dev) { - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; bool has_vga = false; - list_for_each_entry(encoder, &mode_config->encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { switch (encoder->type) { case INTEL_OUTPUT_ANALOG: has_vga = true; @@ -9902,8 +9899,7 @@ static void intel_modeset_update_staged_output_state(struct drm_device *dev) to_intel_encoder(connector->base.encoder); } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { encoder->new_crtc = to_intel_crtc(encoder->base.crtc); } @@ -9934,8 +9930,7 @@ static void intel_modeset_commit_output_state(struct drm_device *dev) connector->base.encoder = &connector->new_encoder->base; } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { encoder->base.crtc = &encoder->new_crtc->base; } @@ -10105,8 +10100,7 @@ static bool check_single_encoder_cloning(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct intel_encoder *source_encoder; - list_for_each_entry(source_encoder, - &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, source_encoder) { if (source_encoder->new_crtc != crtc) continue; @@ -10122,8 +10116,7 @@ static bool check_encoder_cloning(struct intel_crtc *crtc) struct drm_device *dev = crtc->base.dev; struct intel_encoder *encoder; - list_for_each_entry(encoder, - &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc != crtc) continue; @@ -10207,8 +10200,7 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (&encoder->new_crtc->base != crtc) continue; @@ -10286,8 +10278,7 @@ intel_modeset_affected_pipes(struct drm_crtc *crtc, unsigned *modeset_pipes, 1 << connector->new_encoder->new_crtc->pipe; } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->base.crtc == &encoder->new_crtc->base) continue; @@ -10361,8 +10352,7 @@ intel_modeset_update_state(struct drm_device *dev, unsigned prepare_pipes) struct intel_crtc *intel_crtc; struct drm_connector *connector; - list_for_each_entry(intel_encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, intel_encoder) { if (!intel_encoder->base.crtc) continue; @@ -10636,8 +10626,7 @@ check_encoder_state(struct drm_device *dev) struct intel_encoder *encoder; struct intel_connector *connector; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { bool enabled = false; bool active = false; enum pipe pipe, tracked_pipe; @@ -10716,8 +10705,7 @@ check_crtc_state(struct drm_device *dev) WARN(crtc->active && !crtc->base.enabled, "active crtc, but not enabled in sw tracking\n"); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->base.crtc != &crtc->base) continue; enabled = true; @@ -10739,8 +10727,7 @@ check_crtc_state(struct drm_device *dev) if (crtc->pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) active = crtc->active; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { enum pipe pipe; if (encoder->base.crtc != &crtc->base) continue; @@ -11108,7 +11095,7 @@ static void intel_set_config_restore_state(struct drm_device *dev, } count = 0; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { encoder->new_crtc = to_intel_crtc(config->save_encoder_crtcs[count++]); } @@ -11267,8 +11254,7 @@ intel_modeset_stage_output_state(struct drm_device *dev, } /* Check for any encoders that needs to be disabled. */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { int num_connectors = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, @@ -11301,9 +11287,7 @@ intel_modeset_stage_output_state(struct drm_device *dev, for_each_intel_crtc(dev, crtc) { crtc->new_enabled = false; - list_for_each_entry(encoder, - &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc == crtc) { crtc->new_enabled = true; break; @@ -11340,7 +11324,7 @@ static void disable_crtc_nofb(struct intel_crtc *crtc) connector->new_encoder = NULL; } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc == crtc) encoder->new_crtc = NULL; } @@ -11972,8 +11956,7 @@ static int intel_encoder_clones(struct intel_encoder *encoder) int index_mask = 0; int entry = 0; - list_for_each_entry(source_encoder, - &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, source_encoder) { if (encoders_cloneable(encoder, source_encoder)) index_mask |= (1 << entry); @@ -12162,7 +12145,7 @@ static void intel_setup_outputs(struct drm_device *dev) intel_edp_psr_init(dev); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { encoder->base.possible_crtcs = encoder->crtc_mask; encoder->base.possible_clones = intel_encoder_clones(encoder); @@ -13056,8 +13039,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS); } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { @@ -13121,8 +13103,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, } /* HW state is read out, now we need to sanitize this mess. */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 5f47d359a991..9169786dbbc3 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -885,7 +885,7 @@ static bool hdmi_12bpc_possible(struct intel_crtc *crtc) if (HAS_GMCH_DISPLAY(dev)) return false; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc != crtc) continue; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 881361c0f27e..1987491723a5 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -823,8 +823,7 @@ bool intel_is_dual_link_lvds(struct drm_device *dev) struct intel_encoder *encoder; struct intel_lvds_encoder *lvds_encoder; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->type == INTEL_OUTPUT_LVDS) { lvds_encoder = to_lvds_encoder(&encoder->base); -- GitLab From 76eebda727c76b5712f6ce75a45a9917d3873a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Aug 2014 11:26:52 +0530 Subject: [PATCH 0064/1868] drm/i915: Add 180 degree sprite rotation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sprite planes (in fact all display planes starting from gen4) support 180 degree rotation. Add the relevant low level bits to the sprite code to make use of that feature. The upper layers are not yet plugged in. v2: HSW handles the rotated buffer offset automagically v3: BDW also handles the rotated buffer offset automagically Testcase: igt/kms_rotation_crc Cc: dri-devel@lists.freedesktop.org Signed-off-by: Ville Syrjälä Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_sprite.c | 38 +++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7f9a81087ebc..5ebac620bbcd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4218,6 +4218,7 @@ enum punit_power_well { #define DVS_YUV_ORDER_UYVY (1<<16) #define DVS_YUV_ORDER_YVYU (2<<16) #define DVS_YUV_ORDER_VYUY (3<<16) +#define DVS_ROTATE_180 (1<<15) #define DVS_DEST_KEY (1<<2) #define DVS_TRICKLE_FEED_DISABLE (1<<14) #define DVS_TILED (1<<10) @@ -4288,6 +4289,7 @@ enum punit_power_well { #define SPRITE_YUV_ORDER_UYVY (1<<16) #define SPRITE_YUV_ORDER_YVYU (2<<16) #define SPRITE_YUV_ORDER_VYUY (3<<16) +#define SPRITE_ROTATE_180 (1<<15) #define SPRITE_TRICKLE_FEED_DISABLE (1<<14) #define SPRITE_INT_GAMMA_ENABLE (1<<13) #define SPRITE_TILED (1<<10) @@ -4361,6 +4363,7 @@ enum punit_power_well { #define SP_YUV_ORDER_UYVY (1<<16) #define SP_YUV_ORDER_YVYU (2<<16) #define SP_YUV_ORDER_VYUY (3<<16) +#define SP_ROTATE_180 (1<<15) #define SP_TILED (1<<10) #define _SPALINOFF (VLV_DISPLAY_BASE + 0x72184) #define _SPASTRIDE (VLV_DISPLAY_BASE + 0x72188) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3198de3007be..b63df4416301 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -448,6 +448,7 @@ struct intel_plane { unsigned int crtc_w, crtc_h; uint32_t src_x, src_y; uint32_t src_w, src_h; + unsigned int rotation; /* Since we need to change the watermarks before/after * enabling/disabling the planes, we need to store the parameters here diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index d34a5696ffb6..f4d10c4b2781 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -164,6 +164,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc, sprctl &= ~SP_PIXFORMAT_MASK; sprctl &= ~SP_YUV_BYTE_ORDER_MASK; sprctl &= ~SP_TILED; + sprctl &= ~SP_ROTATE_180; switch (fb->pixel_format) { case DRM_FORMAT_YUYV: @@ -236,6 +237,14 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc, fb->pitches[0]); linear_offset -= sprsurf_offset; + if (intel_plane->rotation == BIT(DRM_ROTATE_180)) { + sprctl |= SP_ROTATE_180; + + x += src_w; + y += src_h; + linear_offset += src_h * fb->pitches[0] + src_w * pixel_size; + } + atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count); intel_update_primary_plane(intel_crtc); @@ -365,6 +374,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, sprctl &= ~SPRITE_RGB_ORDER_RGBX; sprctl &= ~SPRITE_YUV_BYTE_ORDER_MASK; sprctl &= ~SPRITE_TILED; + sprctl &= ~SPRITE_ROTATE_180; switch (fb->pixel_format) { case DRM_FORMAT_XBGR8888: @@ -427,6 +437,18 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, pixel_size, fb->pitches[0]); linear_offset -= sprsurf_offset; + if (intel_plane->rotation == BIT(DRM_ROTATE_180)) { + sprctl |= SPRITE_ROTATE_180; + + /* HSW and BDW does this automagically in hardware */ + if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { + x += src_w; + y += src_h; + linear_offset += src_h * fb->pitches[0] + + src_w * pixel_size; + } + } + atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count); intel_update_primary_plane(intel_crtc); @@ -572,6 +594,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, dvscntr &= ~DVS_RGB_ORDER_XBGR; dvscntr &= ~DVS_YUV_BYTE_ORDER_MASK; dvscntr &= ~DVS_TILED; + dvscntr &= ~DVS_ROTATE_180; switch (fb->pixel_format) { case DRM_FORMAT_XBGR8888: @@ -629,6 +652,14 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, pixel_size, fb->pitches[0]); linear_offset -= dvssurf_offset; + if (intel_plane->rotation == BIT(DRM_ROTATE_180)) { + dvscntr |= DVS_ROTATE_180; + + x += src_w; + y += src_h; + linear_offset += src_h * fb->pitches[0] + src_w * pixel_size; + } + atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count); intel_update_primary_plane(intel_crtc); @@ -896,6 +927,9 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, max_scale = intel_plane->max_downscale << 16; min_scale = intel_plane->can_scale ? 1 : (1 << 16); + drm_rect_rotate(&src, fb->width << 16, fb->height << 16, + intel_plane->rotation); + hscale = drm_rect_calc_hscale_relaxed(&src, &dst, min_scale, max_scale); BUG_ON(hscale < 0); @@ -934,6 +968,9 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, drm_rect_width(&dst) * hscale - drm_rect_width(&src), drm_rect_height(&dst) * vscale - drm_rect_height(&src)); + drm_rect_rotate_inv(&src, fb->width << 16, fb->height << 16, + intel_plane->rotation); + /* sanity check to make sure the src viewport wasn't enlarged */ WARN_ON(src.x1 < (int) src_x || src.y1 < (int) src_y || @@ -1311,6 +1348,7 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) intel_plane->pipe = pipe; intel_plane->plane = plane; + intel_plane->rotation = BIT(DRM_ROTATE_0); possible_crtcs = (1 << pipe); ret = drm_plane_init(dev, &intel_plane->base, possible_crtcs, &intel_plane_funcs, -- GitLab From e57465f35192246b6587c3bc89b5ed96a8fdfb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Aug 2014 11:26:53 +0530 Subject: [PATCH 0065/1868] drm/i915: Make intel_plane_restore() return an error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propagate the error from intel_update_plane() up through intel_plane_restore() to the caller. This will be used for rollback purposes when setting properties fails. Cc: dri-devel@lists.freedesktop.org Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_sprite.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b63df4416301..79782094c8f9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1086,7 +1086,7 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob); int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane); void intel_flush_primary_plane(struct drm_i915_private *dev_priv, enum plane plane); -void intel_plane_restore(struct drm_plane *plane); +int intel_plane_restore(struct drm_plane *plane); void intel_plane_disable(struct drm_plane *plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index f4d10c4b2781..611826209c99 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1218,18 +1218,18 @@ int intel_sprite_get_colorkey(struct drm_device *dev, void *data, return ret; } -void intel_plane_restore(struct drm_plane *plane) +int intel_plane_restore(struct drm_plane *plane) { struct intel_plane *intel_plane = to_intel_plane(plane); if (!plane->crtc || !plane->fb) - return; + return 0; - intel_update_plane(plane, plane->crtc, plane->fb, - intel_plane->crtc_x, intel_plane->crtc_y, - intel_plane->crtc_w, intel_plane->crtc_h, - intel_plane->src_x, intel_plane->src_y, - intel_plane->src_w, intel_plane->src_h); + return intel_update_plane(plane, plane->crtc, plane->fb, + intel_plane->crtc_x, intel_plane->crtc_y, + intel_plane->crtc_w, intel_plane->crtc_h, + intel_plane->src_x, intel_plane->src_y, + intel_plane->src_w, intel_plane->src_h); } void intel_plane_disable(struct drm_plane *plane) -- GitLab From 2a297cce2e775812e9d6ca84c3ab92cee5c38e25 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Tue, 5 Aug 2014 11:26:54 +0530 Subject: [PATCH 0066/1868] drm: Add rotation_property to mode_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sonika Jindal Reviewed-by: Ville Syrjälä Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- include/drm/drm_crtc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index f1105d0da059..62f73bdbcc47 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -821,6 +821,7 @@ struct drm_mode_config { struct drm_property *dpms_property; struct drm_property *path_property; struct drm_property *plane_type_property; + struct drm_property *rotation_property; /* DVI-I properties */ struct drm_property *dvi_i_subconnector_property; -- GitLab From 7ed6eeeec60c98ec824e27a5052aa249d98bd63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Aug 2014 11:26:55 +0530 Subject: [PATCH 0067/1868] drm/i915: Add rotation property for sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sprite planes support 180 degree rotation. The lower layers are now in place, so hook in the standard rotation property to expose the feature to the users. v2: Moving rotation_property to mode_config Cc: dri-devel@lists.freedesktop.org Signed-off-by: Ville Syrjälä Signed-off-by: Sonika Jindal Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_sprite.c | 41 ++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 611826209c99..0bdb00b7c59c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1218,6 +1218,30 @@ int intel_sprite_get_colorkey(struct drm_device *dev, void *data, return ret; } +static int intel_plane_set_property(struct drm_plane *plane, + struct drm_property *prop, + uint64_t val) +{ + struct drm_device *dev = plane->dev; + struct intel_plane *intel_plane = to_intel_plane(plane); + uint64_t old_val; + int ret = -ENOENT; + + if (prop == dev->mode_config.rotation_property) { + /* exactly one rotation angle please */ + if (hweight32(val & 0xf) != 1) + return -EINVAL; + + old_val = intel_plane->rotation; + intel_plane->rotation = val; + ret = intel_plane_restore(plane); + if (ret) + intel_plane->rotation = old_val; + } + + return ret; +} + int intel_plane_restore(struct drm_plane *plane) { struct intel_plane *intel_plane = to_intel_plane(plane); @@ -1244,6 +1268,7 @@ static const struct drm_plane_funcs intel_plane_funcs = { .update_plane = intel_update_plane, .disable_plane = intel_disable_plane, .destroy = intel_destroy_plane, + .set_property = intel_plane_set_property, }; static uint32_t ilk_plane_formats[] = { @@ -1354,8 +1379,22 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) &intel_plane_funcs, plane_formats, num_plane_formats, false); - if (ret) + if (ret) { kfree(intel_plane); + goto out; + } + + if (!dev->mode_config.rotation_property) + dev->mode_config.rotation_property = + drm_mode_create_rotation_property(dev, + BIT(DRM_ROTATE_0) | + BIT(DRM_ROTATE_180)); + + if (dev->mode_config.rotation_property) + drm_object_attach_property(&intel_plane->base.base, + dev->mode_config.rotation_property, + intel_plane->rotation); + out: return ret; } -- GitLab From 9783de20967a59d7627772bf77fc8066c47bef79 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Tue, 5 Aug 2014 11:26:57 +0530 Subject: [PATCH 0068/1868] drm: Resetting rotation property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reset rotation property to 0. v2: Resetting after disabling the plane Signed-off-by: Sonika Jindal Reviewed-by: Ville Syrjälä Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3144db9dc0f1..d139eddb3d61 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -345,10 +345,17 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper) drm_warn_on_modeset_not_all_locked(dev); - list_for_each_entry(plane, &dev->mode_config.plane_list, head) + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { if (plane->type != DRM_PLANE_TYPE_PRIMARY) drm_plane_force_disable(plane); + if (dev->mode_config.rotation_property) { + drm_object_property_set_value(&plane->base, + dev->mode_config.rotation_property, + BIT(DRM_ROTATE_0)); + } + } + for (i = 0; i < fb_helper->crtc_count; i++) { struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set; struct drm_crtc *crtc = mode_set->crtc; -- GitLab From 0a56067469bde6662ce7c89a3d290171f878bac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Jun 2014 16:51:18 +0300 Subject: [PATCH 0069/1868] drm/i915: Fill out the FWx watermark register defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add defines for all the watermark registers on modernish gmch platforms. VLV has increased the number of bits available for certain watermaks so expand the masks appropriately. Also vlv and chv have added some extra FW registers. Not sure what happened on chv because a new register called FW9 is now at the offset where FW7 was on vlv, while FW7 and FW8 (another new register) have been moved off somewhere else. Oh well, well just need two defines for FW7 then. v2: Fix DSPHOWM1 offset (Paulo) Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 138 ++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_pm.c | 11 ++- 2 files changed, 130 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5ebac620bbcd..a87eb18b4c90 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3865,28 +3865,136 @@ enum punit_power_well { #define DSPARB_BEND_SHIFT 9 /* on 855 */ #define DSPARB_AEND_SHIFT 0 +/* pnv/gen4/g4x/vlv/chv */ #define DSPFW1 (dev_priv->info.display_mmio_offset + 0x70034) -#define DSPFW_SR_SHIFT 23 -#define DSPFW_SR_MASK (0x1ff<<23) -#define DSPFW_CURSORB_SHIFT 16 -#define DSPFW_CURSORB_MASK (0x3f<<16) -#define DSPFW_PLANEB_SHIFT 8 -#define DSPFW_PLANEB_MASK (0x7f<<8) -#define DSPFW_PLANEA_MASK (0x7f) +#define DSPFW_SR_SHIFT 23 +#define DSPFW_SR_MASK (0x1ff<<23) +#define DSPFW_CURSORB_SHIFT 16 +#define DSPFW_CURSORB_MASK (0x3f<<16) +#define DSPFW_PLANEB_SHIFT 8 +#define DSPFW_PLANEB_MASK (0x7f<<8) +#define DSPFW_PLANEB_MASK_VLV (0xff<<8) /* vlv/chv */ +#define DSPFW_PLANEA_SHIFT 0 +#define DSPFW_PLANEA_MASK (0x7f<<0) +#define DSPFW_PLANEA_MASK_VLV (0xff<<0) /* vlv/chv */ #define DSPFW2 (dev_priv->info.display_mmio_offset + 0x70038) -#define DSPFW_CURSORA_MASK 0x00003f00 -#define DSPFW_CURSORA_SHIFT 8 -#define DSPFW_PLANEC_MASK (0x7f) +#define DSPFW_FBC_SR_EN (1<<31) /* g4x */ +#define DSPFW_FBC_SR_SHIFT 28 +#define DSPFW_FBC_SR_MASK (0x7<<28) /* g4x */ +#define DSPFW_FBC_HPLL_SR_SHIFT 24 +#define DSPFW_FBC_HPLL_SR_MASK (0xf<<24) /* g4x */ +#define DSPFW_SPRITEB_SHIFT (16) +#define DSPFW_SPRITEB_MASK (0x7f<<16) /* g4x */ +#define DSPFW_SPRITEB_MASK_VLV (0xff<<16) /* vlv/chv */ +#define DSPFW_CURSORA_SHIFT 8 +#define DSPFW_CURSORA_MASK (0x3f<<8) +#define DSPFW_PLANEC_SHIFT_OLD 0 +#define DSPFW_PLANEC_MASK_OLD (0x7f<<0) /* pre-gen4 sprite C */ +#define DSPFW_SPRITEA_SHIFT 0 +#define DSPFW_SPRITEA_MASK (0x7f<<0) /* g4x */ +#define DSPFW_SPRITEA_MASK_VLV (0xff<<0) /* vlv/chv */ #define DSPFW3 (dev_priv->info.display_mmio_offset + 0x7003c) -#define DSPFW_HPLL_SR_EN (1<<31) -#define DSPFW_CURSOR_SR_SHIFT 24 +#define DSPFW_HPLL_SR_EN (1<<31) #define PINEVIEW_SELF_REFRESH_EN (1<<30) +#define DSPFW_CURSOR_SR_SHIFT 24 #define DSPFW_CURSOR_SR_MASK (0x3f<<24) #define DSPFW_HPLL_CURSOR_SHIFT 16 #define DSPFW_HPLL_CURSOR_MASK (0x3f<<16) -#define DSPFW_HPLL_SR_MASK (0x1ff) -#define DSPFW4 (dev_priv->info.display_mmio_offset + 0x70070) -#define DSPFW7 (dev_priv->info.display_mmio_offset + 0x7007c) +#define DSPFW_HPLL_SR_SHIFT 0 +#define DSPFW_HPLL_SR_MASK (0x1ff<<0) + +/* vlv/chv */ +#define DSPFW4 (VLV_DISPLAY_BASE + 0x70070) +#define DSPFW_SPRITEB_WM1_SHIFT 16 +#define DSPFW_SPRITEB_WM1_MASK (0xff<<16) +#define DSPFW_CURSORA_WM1_SHIFT 8 +#define DSPFW_CURSORA_WM1_MASK (0x3f<<8) +#define DSPFW_SPRITEA_WM1_SHIFT 0 +#define DSPFW_SPRITEA_WM1_MASK (0xff<<0) +#define DSPFW5 (VLV_DISPLAY_BASE + 0x70074) +#define DSPFW_PLANEB_WM1_SHIFT 24 +#define DSPFW_PLANEB_WM1_MASK (0xff<<24) +#define DSPFW_PLANEA_WM1_SHIFT 16 +#define DSPFW_PLANEA_WM1_MASK (0xff<<16) +#define DSPFW_CURSORB_WM1_SHIFT 8 +#define DSPFW_CURSORB_WM1_MASK (0x3f<<8) +#define DSPFW_CURSOR_SR_WM1_SHIFT 0 +#define DSPFW_CURSOR_SR_WM1_MASK (0x3f<<0) +#define DSPFW6 (VLV_DISPLAY_BASE + 0x70078) +#define DSPFW_SR_WM1_SHIFT 0 +#define DSPFW_SR_WM1_MASK (0x1ff<<0) +#define DSPFW7 (VLV_DISPLAY_BASE + 0x7007c) +#define DSPFW7_CHV (VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */ +#define DSPFW_SPRITED_WM1_SHIFT 24 +#define DSPFW_SPRITED_WM1_MASK (0xff<<24) +#define DSPFW_SPRITED_SHIFT 16 +#define DSPFW_SPRITED_MASK (0xff<<16) +#define DSPFW_SPRITEC_WM1_SHIFT 8 +#define DSPFW_SPRITEC_WM1_MASK (0xff<<8) +#define DSPFW_SPRITEC_SHIFT 0 +#define DSPFW_SPRITEC_MASK (0xff<<0) +#define DSPFW8_CHV (VLV_DISPLAY_BASE + 0x700b8) +#define DSPFW_SPRITEF_WM1_SHIFT 24 +#define DSPFW_SPRITEF_WM1_MASK (0xff<<24) +#define DSPFW_SPRITEF_SHIFT 16 +#define DSPFW_SPRITEF_MASK (0xff<<16) +#define DSPFW_SPRITEE_WM1_SHIFT 8 +#define DSPFW_SPRITEE_WM1_MASK (0xff<<8) +#define DSPFW_SPRITEE_SHIFT 0 +#define DSPFW_SPRITEE_MASK (0xff<<0) +#define DSPFW9_CHV (VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */ +#define DSPFW_PLANEC_WM1_SHIFT 24 +#define DSPFW_PLANEC_WM1_MASK (0xff<<24) +#define DSPFW_PLANEC_SHIFT 16 +#define DSPFW_PLANEC_MASK (0xff<<16) +#define DSPFW_CURSORC_WM1_SHIFT 8 +#define DSPFW_CURSORC_WM1_MASK (0x3f<<16) +#define DSPFW_CURSORC_SHIFT 0 +#define DSPFW_CURSORC_MASK (0x3f<<0) + +/* vlv/chv high order bits */ +#define DSPHOWM (VLV_DISPLAY_BASE + 0x70064) +#define DSPFW_SR_HI_SHIFT 24 +#define DSPFW_SR_HI_MASK (1<<24) +#define DSPFW_SPRITEF_HI_SHIFT 23 +#define DSPFW_SPRITEF_HI_MASK (1<<23) +#define DSPFW_SPRITEE_HI_SHIFT 22 +#define DSPFW_SPRITEE_HI_MASK (1<<22) +#define DSPFW_PLANEC_HI_SHIFT 21 +#define DSPFW_PLANEC_HI_MASK (1<<21) +#define DSPFW_SPRITED_HI_SHIFT 20 +#define DSPFW_SPRITED_HI_MASK (1<<20) +#define DSPFW_SPRITEC_HI_SHIFT 16 +#define DSPFW_SPRITEC_HI_MASK (1<<16) +#define DSPFW_PLANEB_HI_SHIFT 12 +#define DSPFW_PLANEB_HI_MASK (1<<12) +#define DSPFW_SPRITEB_HI_SHIFT 8 +#define DSPFW_SPRITEB_HI_MASK (1<<8) +#define DSPFW_SPRITEA_HI_SHIFT 4 +#define DSPFW_SPRITEA_HI_MASK (1<<4) +#define DSPFW_PLANEA_HI_SHIFT 0 +#define DSPFW_PLANEA_HI_MASK (1<<0) +#define DSPHOWM1 (VLV_DISPLAY_BASE + 0x70068) +#define DSPFW_SR_WM1_HI_SHIFT 24 +#define DSPFW_SR_WM1_HI_MASK (1<<24) +#define DSPFW_SPRITEF_WM1_HI_SHIFT 23 +#define DSPFW_SPRITEF_WM1_HI_MASK (1<<23) +#define DSPFW_SPRITEE_WM1_HI_SHIFT 22 +#define DSPFW_SPRITEE_WM1_HI_MASK (1<<22) +#define DSPFW_PLANEC_WM1_HI_SHIFT 21 +#define DSPFW_PLANEC_WM1_HI_MASK (1<<21) +#define DSPFW_SPRITED_WM1_HI_SHIFT 20 +#define DSPFW_SPRITED_WM1_HI_MASK (1<<20) +#define DSPFW_SPRITEC_WM1_HI_SHIFT 16 +#define DSPFW_SPRITEC_WM1_HI_MASK (1<<16) +#define DSPFW_PLANEB_WM1_HI_SHIFT 12 +#define DSPFW_PLANEB_WM1_HI_MASK (1<<12) +#define DSPFW_SPRITEB_WM1_HI_SHIFT 8 +#define DSPFW_SPRITEB_WM1_HI_MASK (1<<8) +#define DSPFW_SPRITEA_WM1_HI_SHIFT 4 +#define DSPFW_SPRITEA_WM1_HI_MASK (1<<4) +#define DSPFW_PLANEA_WM1_HI_SHIFT 0 +#define DSPFW_PLANEA_WM1_HI_MASK (1<<0) /* drain latency register values*/ #define DRAIN_LATENCY_PRECISION_32 32 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ab80df2909e0..0f9164d854de 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1400,7 +1400,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc) (plane_sr << DSPFW_SR_SHIFT) | (cursorb_wm << DSPFW_CURSORB_SHIFT) | (planeb_wm << DSPFW_PLANEB_SHIFT) | - planea_wm); + (planea_wm << DSPFW_PLANEA_SHIFT)); I915_WRITE(DSPFW2, (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | (cursora_wm << DSPFW_CURSORA_SHIFT)); @@ -1457,7 +1457,7 @@ static void g4x_update_wm(struct drm_crtc *crtc) (plane_sr << DSPFW_SR_SHIFT) | (cursorb_wm << DSPFW_CURSORB_SHIFT) | (planeb_wm << DSPFW_PLANEB_SHIFT) | - planea_wm); + (planea_wm << DSPFW_PLANEA_SHIFT)); I915_WRITE(DSPFW2, (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | (cursora_wm << DSPFW_CURSORA_SHIFT)); @@ -1531,8 +1531,11 @@ static void i965_update_wm(struct drm_crtc *unused_crtc) /* 965 has limitations... */ I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) | - (8 << 16) | (8 << 8) | (8 << 0)); - I915_WRITE(DSPFW2, (8 << 8) | (8 << 0)); + (8 << DSPFW_CURSORB_SHIFT) | + (8 << DSPFW_PLANEB_SHIFT) | + (8 << DSPFW_PLANEA_SHIFT)); + I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) | + (8 << DSPFW_PLANEC_SHIFT_OLD)); /* update cursor SR watermark */ I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); -- GitLab From 1abc4dc7e2ca7886b6b06bcf2ff1451702c83f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jun 2014 17:02:37 +0300 Subject: [PATCH 0070/1868] drm/i915: Parametrize VLV_DDL registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VLV/CHV DDL registers are uniform, and neatly enough the register offsets are sane so we can easily unify them to a single set of defines and just pass the pipe as the parameter to compute the register offset. Note that we now fill out the drain latency for pipe C on CHV which we didn't do before. The rest of the pipe C watermarks are still untouched but that will be remedied later by adding a proper cherryview_update_wm() function. v2: Add a note about CHV pipe C changes (Paulo) Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 54 ++++++++------------------------- drivers/gpu/drm/i915/intel_pm.c | 52 ++++++++++++++----------------- 2 files changed, 36 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a87eb18b4c90..fbdb08f4cd76 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3999,47 +3999,19 @@ enum punit_power_well { /* drain latency register values*/ #define DRAIN_LATENCY_PRECISION_32 32 #define DRAIN_LATENCY_PRECISION_64 64 -#define VLV_DDL1 (VLV_DISPLAY_BASE + 0x70050) -#define DDL_CURSORA_PRECISION_64 (1<<31) -#define DDL_CURSORA_PRECISION_32 (0<<31) -#define DDL_CURSORA_SHIFT 24 -#define DDL_SPRITEB_PRECISION_64 (1<<23) -#define DDL_SPRITEB_PRECISION_32 (0<<23) -#define DDL_SPRITEB_SHIFT 16 -#define DDL_SPRITEA_PRECISION_64 (1<<15) -#define DDL_SPRITEA_PRECISION_32 (0<<15) -#define DDL_SPRITEA_SHIFT 8 -#define DDL_PLANEA_PRECISION_64 (1<<7) -#define DDL_PLANEA_PRECISION_32 (0<<7) -#define DDL_PLANEA_SHIFT 0 - -#define VLV_DDL2 (VLV_DISPLAY_BASE + 0x70054) -#define DDL_CURSORB_PRECISION_64 (1<<31) -#define DDL_CURSORB_PRECISION_32 (0<<31) -#define DDL_CURSORB_SHIFT 24 -#define DDL_SPRITED_PRECISION_64 (1<<23) -#define DDL_SPRITED_PRECISION_32 (0<<23) -#define DDL_SPRITED_SHIFT 16 -#define DDL_SPRITEC_PRECISION_64 (1<<15) -#define DDL_SPRITEC_PRECISION_32 (0<<15) -#define DDL_SPRITEC_SHIFT 8 -#define DDL_PLANEB_PRECISION_64 (1<<7) -#define DDL_PLANEB_PRECISION_32 (0<<7) -#define DDL_PLANEB_SHIFT 0 - -#define VLV_DDL3 (VLV_DISPLAY_BASE + 0x70058) -#define DDL_CURSORC_PRECISION_64 (1<<31) -#define DDL_CURSORC_PRECISION_32 (0<<31) -#define DDL_CURSORC_SHIFT 24 -#define DDL_SPRITEF_PRECISION_64 (1<<23) -#define DDL_SPRITEF_PRECISION_32 (0<<23) -#define DDL_SPRITEF_SHIFT 16 -#define DDL_SPRITEE_PRECISION_64 (1<<15) -#define DDL_SPRITEE_PRECISION_32 (0<<15) -#define DDL_SPRITEE_SHIFT 8 -#define DDL_PLANEC_PRECISION_64 (1<<7) -#define DDL_PLANEC_PRECISION_32 (0<<7) -#define DDL_PLANEC_SHIFT 0 +#define VLV_DDL(pipe) (VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe)) +#define DDL_CURSOR_PRECISION_64 (1<<31) +#define DDL_CURSOR_PRECISION_32 (0<<31) +#define DDL_CURSOR_SHIFT 24 +#define DDL_SPRITE1_PRECISION_64 (1<<23) +#define DDL_SPRITE1_PRECISION_32 (0<<23) +#define DDL_SPRITE1_SHIFT 16 +#define DDL_SPRITE0_PRECISION_64 (1<<15) +#define DDL_SPRITE0_PRECISION_32 (0<<15) +#define DDL_SPRITE0_SHIFT 8 +#define DDL_PLANE_PRECISION_64 (1<<7) +#define DDL_PLANE_PRECISION_32 (0<<7) +#define DDL_PLANE_SHIFT 0 /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0f9164d854de..2573c6967559 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1313,35 +1313,29 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, static void vlv_update_drain_latency(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int planea_prec, planea_dl, planeb_prec, planeb_dl; - int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl; - int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is - either 16 or 32 */ - - /* For plane A, Cursor A */ - if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl, - &cursor_prec_mult, &cursora_dl)) { - cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_64; - planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_64; - - I915_WRITE(VLV_DDL1, cursora_prec | - (cursora_dl << DDL_CURSORA_SHIFT) | - planea_prec | planea_dl); - } - - /* For plane B, Cursor B */ - if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl, - &cursor_prec_mult, &cursorb_dl)) { - cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_64; - planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_64; - - I915_WRITE(VLV_DDL2, cursorb_prec | - (cursorb_dl << DDL_CURSORB_SHIFT) | - planeb_prec | planeb_dl); + enum pipe pipe; + + for_each_pipe(pipe) { + int plane_prec, plane_dl; + int cursor_prec, cursor_dl; + int plane_prec_mult, cursor_prec_mult; + + if (!vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, + &cursor_prec_mult, &cursor_dl)) + continue; + + /* + * FIXME CHV spec still lists 16 and 32 as the precision + * values. Need to figure out if spec is outdated or what. + */ + cursor_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_CURSOR_PRECISION_64 : DDL_CURSOR_PRECISION_32; + plane_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_PLANE_PRECISION_64 : DDL_PLANE_PRECISION_32; + + I915_WRITE(VLV_DDL(pipe), cursor_prec | + (cursor_dl << DDL_CURSOR_SHIFT) | + plane_prec | (plane_dl << DDL_PLANE_SHIFT)); } } -- GitLab From 41aad816d7061f6cc3d92f39fc655f034bbfb1c0 Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Wed, 16 Jul 2014 18:24:03 +0530 Subject: [PATCH 0071/1868] drm/i915: Update DDL only for current CRTC Instead of looping through all CRTCs, update DDL for current CRTC for which watermark is being updated. CHV is confirmed to have precision of 32/64 which is same as VLV. Reviewed-by: Imre Deak Signed-off-by: Gajanan Bhat Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2573c6967559..0feeae845f12 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1310,24 +1310,17 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, * latency value. */ -static void vlv_update_drain_latency(struct drm_device *dev) +static void vlv_update_drain_latency(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe pipe; - - for_each_pipe(pipe) { - int plane_prec, plane_dl; - int cursor_prec, cursor_dl; - int plane_prec_mult, cursor_prec_mult; + enum pipe pipe = to_intel_crtc(crtc)->pipe; + int plane_prec, plane_dl; + int cursor_prec, cursor_dl; + int plane_prec_mult, cursor_prec_mult; - if (!vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, - &cursor_prec_mult, &cursor_dl)) - continue; - - /* - * FIXME CHV spec still lists 16 and 32 as the precision - * values. Need to figure out if spec is outdated or what. - */ + if (vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, + &cursor_prec_mult, &cursor_dl)) { cursor_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_64) ? DDL_CURSOR_PRECISION_64 : DDL_CURSOR_PRECISION_32; plane_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_64) ? @@ -1352,7 +1345,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc) unsigned int enabled = 0; bool cxsr_enabled; - vlv_update_drain_latency(dev); + vlv_update_drain_latency(crtc); if (g4x_compute_wm0(dev, PIPE_A, &valleyview_wm_info, latency_ns, -- GitLab From 3c2777fd2faa5d1c1d5867baa086f2fd8b05479e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jun 2014 17:03:06 +0300 Subject: [PATCH 0072/1868] drm/i915: Add cherryview_update_wm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV has a third pipe so we need to compute the watermarks for its planes. Add cherryview_update_wm() to do just that. v2: Rebase on top of Imre's cxsr changes v3: Pass crtc to vlv_update_drain_latency() Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 81 ++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0feeae845f12..da43e30ad60b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1399,6 +1399,85 @@ static void valleyview_update_wm(struct drm_crtc *crtc) intel_set_memory_cxsr(dev_priv, true); } +static void cherryview_update_wm(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + static const int sr_latency_ns = 12000; + struct drm_i915_private *dev_priv = dev->dev_private; + int planea_wm, planeb_wm, planec_wm; + int cursora_wm, cursorb_wm, cursorc_wm; + int plane_sr, cursor_sr; + int ignore_plane_sr, ignore_cursor_sr; + unsigned int enabled = 0; + bool cxsr_enabled; + + vlv_update_drain_latency(crtc); + + if (g4x_compute_wm0(dev, PIPE_A, + &valleyview_wm_info, latency_ns, + &valleyview_cursor_wm_info, latency_ns, + &planea_wm, &cursora_wm)) + enabled |= 1 << PIPE_A; + + if (g4x_compute_wm0(dev, PIPE_B, + &valleyview_wm_info, latency_ns, + &valleyview_cursor_wm_info, latency_ns, + &planeb_wm, &cursorb_wm)) + enabled |= 1 << PIPE_B; + + if (g4x_compute_wm0(dev, PIPE_C, + &valleyview_wm_info, latency_ns, + &valleyview_cursor_wm_info, latency_ns, + &planec_wm, &cursorc_wm)) + enabled |= 1 << PIPE_C; + + if (single_plane_enabled(enabled) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &plane_sr, &ignore_cursor_sr) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + 2*sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &ignore_plane_sr, &cursor_sr)) { + cxsr_enabled = true; + } else { + cxsr_enabled = false; + intel_set_memory_cxsr(dev_priv, false); + plane_sr = cursor_sr = 0; + } + + DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " + "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, " + "SR: plane=%d, cursor=%d\n", + planea_wm, cursora_wm, + planeb_wm, cursorb_wm, + planec_wm, cursorc_wm, + plane_sr, cursor_sr); + + I915_WRITE(DSPFW1, + (plane_sr << DSPFW_SR_SHIFT) | + (cursorb_wm << DSPFW_CURSORB_SHIFT) | + (planeb_wm << DSPFW_PLANEB_SHIFT) | + (planea_wm << DSPFW_PLANEA_SHIFT)); + I915_WRITE(DSPFW2, + (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | + (cursora_wm << DSPFW_CURSORA_SHIFT)); + I915_WRITE(DSPFW3, + (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | + (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); + I915_WRITE(DSPFW9_CHV, + (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK | + DSPFW_CURSORC_MASK)) | + (planec_wm << DSPFW_PLANEC_SHIFT) | + (cursorc_wm << DSPFW_CURSORC_SHIFT)); + + if (cxsr_enabled) + intel_set_memory_cxsr(dev_priv, true); +} + static void g4x_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -7119,7 +7198,7 @@ void intel_init_pm(struct drm_device *dev) else if (INTEL_INFO(dev)->gen == 8) dev_priv->display.init_clock_gating = gen8_init_clock_gating; } else if (IS_CHERRYVIEW(dev)) { - dev_priv->display.update_wm = valleyview_update_wm; + dev_priv->display.update_wm = cherryview_update_wm; dev_priv->display.init_clock_gating = cherryview_init_clock_gating; } else if (IS_VALLEYVIEW(dev)) { -- GitLab From 3dd7b97458e8aa2d8985b46622d226fa635071e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Jun 2014 19:49:57 +0300 Subject: [PATCH 0073/1868] drm/i915: Hack to tie both common lanes together on chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like frobbing the cmnreset line on pne PHY disturbs the other PHY on chv. The result is a black screen. On HDMI it's just a flash of black, but DP usually falls over and can't get back up. As a workaround set up the power domains so that both common lane wells power up and down together. I also tried leaving the cmnreset deasserted even the if the power well goes down but that didn't seem acceptable to the PHY. Reviewed-by: Rafael Barbalho Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index da43e30ad60b..aaae22815965 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6877,13 +6877,23 @@ static struct i915_power_well chv_power_wells[] = { #endif { .name = "dpio-common-bc", - .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, + /* + * XXX: cmnreset for one PHY seems to disturb the other. + * As a workaround keep both powered on at the same + * time for now. + */ + .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, .data = PUNIT_POWER_WELL_DPIO_CMN_BC, .ops = &chv_dpio_cmn_power_well_ops, }, { .name = "dpio-common-d", - .domains = CHV_DPIO_CMN_D_POWER_DOMAINS, + /* + * XXX: cmnreset for one PHY seems to disturb the other. + * As a workaround keep both powered on at the same + * time for now. + */ + .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, .data = PUNIT_POWER_WELL_DPIO_CMN_D, .ops = &chv_dpio_cmn_power_well_ops, }, -- GitLab From efd814b73cfcf4ed1cb561561d8df6daccb3ceb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Jun 2014 19:52:13 +0300 Subject: [PATCH 0074/1868] drm/i915: Polish the chv cmnlane resrt macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the semi-funky cmnlane assert/deassert macros with something a bit more conventional. Also protect the macro arguments properly (also for PHY_POWERGOOD()). Reviewed-by: Rafael Barbalho Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 7 ++----- drivers/gpu/drm/i915/intel_pm.c | 8 ++++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fbdb08f4cd76..468ef09d698d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1697,12 +1697,9 @@ enum punit_power_well { #define DPIO_PHY_STATUS (VLV_DISPLAY_BASE + 0x6240) #define DPLL_PORTD_READY_MASK (0xf) #define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100) -#define PHY_COM_LANE_RESET_DEASSERT(phy, val) \ - ((phy == DPIO_PHY0) ? (val | 1) : (val | 2)) -#define PHY_COM_LANE_RESET_ASSERT(phy, val) \ - ((phy == DPIO_PHY0) ? (val & ~1) : (val & ~2)) +#define PHY_COM_LANE_RESET_DEASSERT(phy) (1 << (phy)) #define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104) -#define PHY_POWERGOOD(phy) ((phy == DPIO_PHY0) ? (1<<31) : (1<<30)) +#define PHY_POWERGOOD(phy) (((phy) == DPIO_PHY0) ? (1<<31) : (1<<30)) /* * The i830 generation, in LVDS mode, defines P1 as the bit number set within diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index aaae22815965..42bb329b2d05 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6359,8 +6359,8 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) DRM_ERROR("Display PHY %d is not power up\n", phy); - I915_WRITE(DISPLAY_PHY_CONTROL, - PHY_COM_LANE_RESET_DEASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); + I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) | + PHY_COM_LANE_RESET_DEASSERT(phy)); } static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, @@ -6380,8 +6380,8 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, assert_pll_disabled(dev_priv, PIPE_C); } - I915_WRITE(DISPLAY_PHY_CONTROL, - PHY_COM_LANE_RESET_ASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); + I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) & + ~PHY_COM_LANE_RESET_DEASSERT(phy)); vlv_set_power_well(dev_priv, power_well, false); } -- GitLab From 692ef70c016b5035ad1942ccc2bc4040aa290044 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 5 Aug 2014 07:51:18 -0700 Subject: [PATCH 0075/1868] drm/i915: clean up PPGTT checking logic sanitize_enable_ppgtt is the function that checks all the conditions, honoring a forced ppgtt status or doing auto-detect as necessary. Just make sure it returns the right value in all cases and use that in the macros instead of the confusing intel_enable_ppgtt() function. Signed-off-by: Jesse Barnes Signed-off-by: Rodrigo Vivi [danvet: Don't reenable full ppgtt through the backdoor.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ----------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 73d2308e2377..125a83c70768 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2065,8 +2065,8 @@ struct drm_i915_cmd_table { #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) -#define USES_PPGTT(dev) intel_enable_ppgtt(dev, false) -#define USES_FULL_PPGTT(dev) intel_enable_ppgtt(dev, true) +#define USES_PPGTT(dev) (i915.enable_ppgtt) +#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt == 2) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1411613f2174..b4b7cfd226b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -33,17 +33,6 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); -bool intel_enable_ppgtt(struct drm_device *dev, bool full) -{ - if (i915.enable_ppgtt == 0) - return false; - - if (i915.enable_ppgtt == 1 && full) - return false; - - return true; -} - static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) { if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 8d6f7c18c404..666c938a51e3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,7 +272,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); -bool intel_enable_ppgtt(struct drm_device *dev, bool full); int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); void i915_check_and_clear_faults(struct drm_device *dev); -- GitLab From e2fcdaa9c951c51d558fea2cc020d89b382d702e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:02:51 +0300 Subject: [PATCH 0076/1868] drm/i915: Free pending page flip events at .preclose() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there are pending page flips when the fd gets closed those page flips may have events associated to them. When the page flip eventually completes it will queue the event to file_priv->event_list, but that may be too late and file_priv->event_list has already been cleaned up. Thus we leak a bit of kernel memory in the form of the event structure. To avoid such problems clear out such pending events from intel_crtc->unpin_work at ->preclose(). Any event that already made it to file_priv->event_list will get cleaned up by the drm_release_events() a bit later. We can ignore the file_priv->event_space accounting since file_priv is going away. This is already how drm core deals with pending vblank events, which are maintained by the drm core. What saves us from a total disaster (ie. dereferencing and alrady freed file_priv) is the fact that the fb descruction triggers a modeset and there we wait for pending flips. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2e7f03ad5ee2..c965698a8bac 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1981,6 +1981,9 @@ void i915_driver_preclose(struct drm_device *dev, struct drm_file *file) i915_gem_context_close(dev, file); i915_gem_release(dev, file); mutex_unlock(&dev->struct_mutex); + + if (drm_core_check_feature(dev, DRIVER_MODESET)) + intel_modeset_preclose(dev, file); } void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f32a94544940..24295694e493 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13470,3 +13470,25 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m, err_printf(m, " VSYNC: %08x\n", error->transcoder[i].vsync); } } + +void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(dev, crtc) { + struct intel_unpin_work *work; + unsigned long irqflags; + + spin_lock_irqsave(&dev->event_lock, irqflags); + + work = crtc->unpin_work; + + if (work && work->event && + work->event->base.file_priv == file) { + kfree(work->event); + work->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, irqflags); + } +} diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 79782094c8f9..666ca8a044ea 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -892,6 +892,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_config *pipe_config); int intel_format_to_fourcc(int format); void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc); +void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file); /* intel_dp.c */ void intel_dp_init(struct drm_device *dev, int output_reg, enum port port); -- GitLab From 0948c2651413d56c90d7ee9c99d75bef82d4c351 Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Thu, 7 Aug 2014 01:58:24 +0530 Subject: [PATCH 0077/1868] drm/i915: Generalize drain latency computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify drain latency computation to use it for any plane. Same function can be used for primary, cursor and sprite planes. v2: Adressed review comments by Imre and Ville. - Moved clock round up in separate patch - Added WARN check for clock and pixel size - Simplified bit masking - Use cursor_base instead of reg read v3: Changed to bitwise shorthand operator for plane_dl assignment. Signed-off-by: Gajanan Bhat Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 87 +++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 468ef09d698d..d0cff5630569 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4009,6 +4009,7 @@ enum punit_power_well { #define DDL_PLANE_PRECISION_64 (1<<7) #define DDL_PLANE_PRECISION_32 (0<<7) #define DDL_PLANE_SHIFT 0 +#define DRAIN_LATENCY_MASK 0x7f /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 42bb329b2d05..de27439636e8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1271,33 +1271,24 @@ static bool g4x_compute_srwm(struct drm_device *dev, display, cursor); } -static bool vlv_compute_drain_latency(struct drm_device *dev, - int plane, - int *plane_prec_mult, - int *plane_dl, - int *cursor_prec_mult, - int *cursor_dl) +static bool vlv_compute_drain_latency(struct drm_crtc *crtc, + int pixel_size, + int *prec_mult, + int *drain_latency) { - struct drm_crtc *crtc; - int clock, pixel_size; int entries; + int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; - crtc = intel_get_crtc_for_plane(dev, plane); - if (!intel_crtc_active(crtc)) + if (WARN(clock == 0, "Pixel clock is zero!\n")) return false; - clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; - pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ + if (WARN(pixel_size == 0, "Pixel size is zero!\n")) + return false; entries = (clock / 1000) * pixel_size; - *plane_prec_mult = (entries > 128) ? - DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32; - *plane_dl = (64 * (*plane_prec_mult) * 4) / entries; - - entries = (clock / 1000) * 4; /* BPP is always 4 for cursor */ - *cursor_prec_mult = (entries > 128) ? - DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32; - *cursor_dl = (64 * (*cursor_prec_mult) * 4) / entries; + *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : + DRAIN_LATENCY_PRECISION_32; + *drain_latency = (64 * (*prec_mult) * 4) / entries; return true; } @@ -1312,24 +1303,46 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, static void vlv_update_drain_latency(struct drm_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe pipe = to_intel_crtc(crtc)->pipe; - int plane_prec, plane_dl; - int cursor_prec, cursor_dl; - int plane_prec_mult, cursor_prec_mult; - - if (vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, - &cursor_prec_mult, &cursor_dl)) { - cursor_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_CURSOR_PRECISION_64 : DDL_CURSOR_PRECISION_32; - plane_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_PLANE_PRECISION_64 : DDL_PLANE_PRECISION_32; - - I915_WRITE(VLV_DDL(pipe), cursor_prec | - (cursor_dl << DDL_CURSOR_SHIFT) | - plane_prec | (plane_dl << DDL_PLANE_SHIFT)); + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pixel_size; + int drain_latency; + enum pipe pipe = intel_crtc->pipe; + int plane_prec, prec_mult, plane_dl; + + plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 | + DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 | + (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT)); + + if (!intel_crtc_active(crtc)) { + I915_WRITE(VLV_DDL(pipe), plane_dl); + return; + } + + /* Primary plane Drain Latency */ + pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ + if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { + plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_PLANE_PRECISION_64 : + DDL_PLANE_PRECISION_32; + plane_dl |= plane_prec | drain_latency; } + + /* Cursor Drain Latency + * BPP is always 4 for cursor + */ + pixel_size = 4; + + /* Program cursor DL only if it is enabled */ + if (intel_crtc->cursor_base && + vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { + plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_CURSOR_PRECISION_64 : + DDL_CURSOR_PRECISION_32; + plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT); + } + + I915_WRITE(VLV_DDL(pipe), plane_dl); } #define single_plane_enabled(mask) is_power_of_2(mask) -- GitLab From a398e9c79eea74ba2f3f24ac08902661682f008c Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Tue, 5 Aug 2014 23:15:54 +0530 Subject: [PATCH 0078/1868] drm/i915: Round-up clock and limit drain latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round up clock computation and limit drain latency to maximum of 0x7F. Signed-off-by: Gajanan Bhat Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index de27439636e8..19bd7212f4a2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1285,11 +1285,14 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, if (WARN(pixel_size == 0, "Pixel size is zero!\n")) return false; - entries = (clock / 1000) * pixel_size; + entries = DIV_ROUND_UP(clock, 1000) * pixel_size; *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32; *drain_latency = (64 * (*prec_mult) * 4) / entries; + if (*drain_latency > DRAIN_LATENCY_MASK) + *drain_latency = DRAIN_LATENCY_MASK; + return true; } -- GitLab From 01e184cc85d4516cab0ecea7c2c43a2dd3ad432b Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Thu, 7 Aug 2014 17:03:30 +0530 Subject: [PATCH 0079/1868] drm/i915: Add sprite watermark programming for VLV and CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Program DDL register as part of sprite watermark programming for CHV and VLV. v2: Rename DRAIN_LATENCY_MAX by DRAIN_LATENCY_MASK v3: Addressed review comments by Ville - Changed Sprite DDL definitions to more generic to avoid multiple if-else - Changed bit masking to customary form - Changed to bitwise shorthand operator for sprite_dl assignment Signed-off-by: Gajanan Bhat Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 9 +++------ drivers/gpu/drm/i915/intel_pm.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d0cff5630569..7a6cc69cdc2b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4000,12 +4000,9 @@ enum punit_power_well { #define DDL_CURSOR_PRECISION_64 (1<<31) #define DDL_CURSOR_PRECISION_32 (0<<31) #define DDL_CURSOR_SHIFT 24 -#define DDL_SPRITE1_PRECISION_64 (1<<23) -#define DDL_SPRITE1_PRECISION_32 (0<<23) -#define DDL_SPRITE1_SHIFT 16 -#define DDL_SPRITE0_PRECISION_64 (1<<15) -#define DDL_SPRITE0_PRECISION_32 (0<<15) -#define DDL_SPRITE0_SHIFT 8 +#define DDL_SPRITE_PRECISION_64(sprite) (1<<(15+8*(sprite))) +#define DDL_SPRITE_PRECISION_32(sprite) (0<<(15+8*(sprite))) +#define DDL_SPRITE_SHIFT(sprite) (8+8*(sprite)) #define DDL_PLANE_PRECISION_64 (1<<7) #define DDL_PLANE_PRECISION_32 (0<<7) #define DDL_PLANE_SHIFT 0 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 19bd7212f4a2..41de760bf1d4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1494,6 +1494,37 @@ static void cherryview_update_wm(struct drm_crtc *crtc) intel_set_memory_cxsr(dev_priv, true); } +static void valleyview_update_sprite_wm(struct drm_plane *plane, + struct drm_crtc *crtc, + uint32_t sprite_width, + uint32_t sprite_height, + int pixel_size, + bool enabled, bool scaled) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe = to_intel_plane(plane)->pipe; + int sprite = to_intel_plane(plane)->plane; + int drain_latency; + int plane_prec; + int sprite_dl; + int prec_mult; + + sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) | + (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite))); + + if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, + &drain_latency)) { + plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_SPRITE_PRECISION_64(sprite) : + DDL_SPRITE_PRECISION_32(sprite); + sprite_dl |= plane_prec | + (drain_latency << DDL_SPRITE_SHIFT(sprite)); + } + + I915_WRITE(VLV_DDL(pipe), sprite_dl); +} + static void g4x_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -7225,10 +7256,12 @@ void intel_init_pm(struct drm_device *dev) dev_priv->display.init_clock_gating = gen8_init_clock_gating; } else if (IS_CHERRYVIEW(dev)) { dev_priv->display.update_wm = cherryview_update_wm; + dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; dev_priv->display.init_clock_gating = cherryview_init_clock_gating; } else if (IS_VALLEYVIEW(dev)) { dev_priv->display.update_wm = valleyview_update_wm; + dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; dev_priv->display.init_clock_gating = valleyview_init_clock_gating; } else if (IS_PINEVIEW(dev)) { -- GitLab From 403bdd10c815029694046adf5ffde0577cbd2866 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 7 Aug 2014 16:05:39 +0200 Subject: [PATCH 0080/1868] drm/i915: No busy-loop wait_for in the ring init code Doing a 1s wait (tops) with the cpu is a bit excessive. Tune it down like everything else in that code. v2: Also insert the missing space Chris spotted. Cc: Naresh Kumar Kachhi Cc: Chris Wilson Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16371a444426..117543e58d48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -476,8 +476,8 @@ static bool stop_ring(struct intel_engine_cs *ring) if (!IS_GEN2(ring->dev)) { I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); - if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); return false; } } -- GitLab From 10f637bf292ba501f9b9e9df6dfe21d8fa521fbd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Jul 2014 13:47:11 +0200 Subject: [PATCH 0081/1868] drm: Add drm_plane/connector_index In the atomic state we'll have an array of states for crtcs, planes and connectors and need to be able to at them by their index. We already have a drm_crtc_index function so add the missing ones for planes and connectors. If it later on turns out that the list walking is too expensive we can add the index to the relevant modeset objects. Rob Clark doesn't like the loops too much, but we can always add an obj->idx parameter later on. And for now reiterating is actually safer since nowadays we have hotpluggable connectors (thanks to DP MST). v2: Fix embarrassing copypasta fail in kerneldoc and header declarations, spotted by Matt Roper. Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 46 ++++++++++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 2 ++ 2 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 66d3bfb8d264..f3ef461deeb8 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1021,6 +1021,29 @@ void drm_connector_cleanup(struct drm_connector *connector) } EXPORT_SYMBOL(drm_connector_cleanup); +/** + * drm_connector_index - find the index of a registered connector + * @connector: connector to find index for + * + * Given a registered connector, return the index of that connector within a DRM + * device's list of connectors. + */ +unsigned int drm_connector_index(struct drm_connector *connector) +{ + unsigned int index = 0; + struct drm_connector *tmp; + + list_for_each_entry(tmp, &connector->dev->mode_config.connector_list, head) { + if (tmp == connector) + return index; + + index++; + } + + BUG(); +} +EXPORT_SYMBOL(drm_connector_index); + /** * drm_connector_register - register a connector * @connector: the connector to register @@ -1325,6 +1348,29 @@ void drm_plane_cleanup(struct drm_plane *plane) } EXPORT_SYMBOL(drm_plane_cleanup); +/** + * drm_plane_index - find the index of a registered plane + * @plane: plane to find index for + * + * Given a registered plane, return the index of that CRTC within a DRM + * device's list of planes. + */ +unsigned int drm_plane_index(struct drm_plane *plane) +{ + unsigned int index = 0; + struct drm_plane *tmp; + + list_for_each_entry(tmp, &plane->dev->mode_config.plane_list, head) { + if (tmp == plane) + return index; + + index++; + } + + BUG(); +} +EXPORT_SYMBOL(drm_plane_index); + /** * drm_plane_force_disable - Forcibly disable a plane * @plane: plane to disable diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c530b4920a09..9f18e7022ab3 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -904,6 +904,7 @@ int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); extern void drm_connector_cleanup(struct drm_connector *connector); +extern unsigned int drm_connector_index(struct drm_connector *connector); /* helper to unplug all connectors from sysfs for device */ extern void drm_connector_unplug_all(struct drm_device *dev); @@ -943,6 +944,7 @@ extern int drm_plane_init(struct drm_device *dev, const uint32_t *formats, uint32_t format_count, bool is_primary); extern void drm_plane_cleanup(struct drm_plane *plane); +extern unsigned int drm_plane_index(struct drm_plane *plane); extern void drm_plane_force_disable(struct drm_plane *plane); extern int drm_crtc_check_viewport(const struct drm_crtc *crtc, int x, int y, -- GitLab From a6a8bb848d5ca40bc0eb708ddeb23df2b0eca1fb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 25 Jul 2014 17:47:18 +0200 Subject: [PATCH 0082/1868] drm: Move modeset_lock_all helpers to drm_modeset_lock.[hc] Somehow we've forgotten about this little bit of OCD. Reviewed-by: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 95 ------------------------------ drivers/gpu/drm/drm_modeset_lock.c | 95 ++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 4 -- include/drm/drm_modeset_lock.h | 5 ++ 4 files changed, 100 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f3ef461deeb8..caaa01f3b353 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -45,101 +45,6 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, struct drm_mode_fb_cmd2 *r, struct drm_file *file_priv); -/** - * drm_modeset_lock_all - take all modeset locks - * @dev: drm device - * - * This function takes all modeset locks, suitable where a more fine-grained - * scheme isn't (yet) implemented. Locks must be dropped with - * drm_modeset_unlock_all. - */ -void drm_modeset_lock_all(struct drm_device *dev) -{ - struct drm_mode_config *config = &dev->mode_config; - struct drm_modeset_acquire_ctx *ctx; - int ret; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (WARN_ON(!ctx)) - return; - - mutex_lock(&config->mutex); - - drm_modeset_acquire_init(ctx, 0); - -retry: - ret = drm_modeset_lock(&config->connection_mutex, ctx); - if (ret) - goto fail; - ret = drm_modeset_lock_all_crtcs(dev, ctx); - if (ret) - goto fail; - - WARN_ON(config->acquire_ctx); - - /* now we hold the locks, so now that it is safe, stash the - * ctx for drm_modeset_unlock_all(): - */ - config->acquire_ctx = ctx; - - drm_warn_on_modeset_not_all_locked(dev); - - return; - -fail: - if (ret == -EDEADLK) { - drm_modeset_backoff(ctx); - goto retry; - } -} -EXPORT_SYMBOL(drm_modeset_lock_all); - -/** - * drm_modeset_unlock_all - drop all modeset locks - * @dev: device - * - * This function drop all modeset locks taken by drm_modeset_lock_all. - */ -void drm_modeset_unlock_all(struct drm_device *dev) -{ - struct drm_mode_config *config = &dev->mode_config; - struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx; - - if (WARN_ON(!ctx)) - return; - - config->acquire_ctx = NULL; - drm_modeset_drop_locks(ctx); - drm_modeset_acquire_fini(ctx); - - kfree(ctx); - - mutex_unlock(&dev->mode_config.mutex); -} -EXPORT_SYMBOL(drm_modeset_unlock_all); - -/** - * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked - * @dev: device - * - * Useful as a debug assert. - */ -void drm_warn_on_modeset_not_all_locked(struct drm_device *dev) -{ - struct drm_crtc *crtc; - - /* Locking is currently fubar in the panic handler. */ - if (oops_in_progress) - return; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); - - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); -} -EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked); - /* Avoid boilerplate. I'm tired of typing. */ #define DRM_ENUM_NAME_FN(fnname, list) \ const char *fnname(int val) \ diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 0dc57d5ecd10..73e6534fd0aa 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -56,6 +56,101 @@ */ +/** + * drm_modeset_lock_all - take all modeset locks + * @dev: drm device + * + * This function takes all modeset locks, suitable where a more fine-grained + * scheme isn't (yet) implemented. Locks must be dropped with + * drm_modeset_unlock_all. + */ +void drm_modeset_lock_all(struct drm_device *dev) +{ + struct drm_mode_config *config = &dev->mode_config; + struct drm_modeset_acquire_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (WARN_ON(!ctx)) + return; + + mutex_lock(&config->mutex); + + drm_modeset_acquire_init(ctx, 0); + +retry: + ret = drm_modeset_lock(&config->connection_mutex, ctx); + if (ret) + goto fail; + ret = drm_modeset_lock_all_crtcs(dev, ctx); + if (ret) + goto fail; + + WARN_ON(config->acquire_ctx); + + /* now we hold the locks, so now that it is safe, stash the + * ctx for drm_modeset_unlock_all(): + */ + config->acquire_ctx = ctx; + + drm_warn_on_modeset_not_all_locked(dev); + + return; + +fail: + if (ret == -EDEADLK) { + drm_modeset_backoff(ctx); + goto retry; + } +} +EXPORT_SYMBOL(drm_modeset_lock_all); + +/** + * drm_modeset_unlock_all - drop all modeset locks + * @dev: device + * + * This function drop all modeset locks taken by drm_modeset_lock_all. + */ +void drm_modeset_unlock_all(struct drm_device *dev) +{ + struct drm_mode_config *config = &dev->mode_config; + struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx; + + if (WARN_ON(!ctx)) + return; + + config->acquire_ctx = NULL; + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + + kfree(ctx); + + mutex_unlock(&dev->mode_config.mutex); +} +EXPORT_SYMBOL(drm_modeset_unlock_all); + +/** + * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked + * @dev: device + * + * Useful as a debug assert. + */ +void drm_warn_on_modeset_not_all_locked(struct drm_device *dev) +{ + struct drm_crtc *crtc; + + /* Locking is currently fubar in the panic handler. */ + if (oops_in_progress) + return; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) + WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); + + WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); +} +EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked); + /** * drm_modeset_acquire_init - initialize acquire context * @ctx: the acquire context diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 9f18e7022ab3..a11d73422e7f 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -218,10 +218,6 @@ struct drm_property { struct list_head enum_blob_list; }; -void drm_modeset_lock_all(struct drm_device *dev); -void drm_modeset_unlock_all(struct drm_device *dev); -void drm_warn_on_modeset_not_all_locked(struct drm_device *dev); - struct drm_crtc; struct drm_connector; struct drm_encoder; diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index 402aa7a6a058..cf61e857bc06 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -120,6 +120,11 @@ int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock, void drm_modeset_unlock(struct drm_modeset_lock *lock); struct drm_device; + +void drm_modeset_lock_all(struct drm_device *dev); +void drm_modeset_unlock_all(struct drm_device *dev); +void drm_warn_on_modeset_not_all_locked(struct drm_device *dev); + int drm_modeset_lock_all_crtcs(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); -- GitLab From d059f652e73c35678d28d4cd09ab2cec89696af9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 25 Jul 2014 18:07:40 +0200 Subject: [PATCH 0083/1868] drm: Handle legacy per-crtc locking with full acquire ctx So drivers using the atomic interfaces expect that they can acquire additional locks internal to the driver as-needed. Examples would be locks to protect shared state like shared display PLLs. Unfortunately the legacy ioctls assume that all locking is fully done by the drm core. Now for those paths which grab all locks we already have to keep around an acquire context in dev->mode_config. Helper functions that implement legacy interfaces in terms of atomic support can therefore grab this acquire contexts and reuse it. The only interfaces left are the cursor and pageflip ioctls. So add functions to grab the crtc lock these need using an acquire context and preserve it for atomic drivers to reuse. v2: - Fixup comments&kerneldoc. - Drop the WARNING from modeset_lock_all_crtcs since that can be used in legacy paths with crtc locking. v3: Fix a type on the kerneldoc Dave spotted. Cc: Dave Airlie Reviewed-by: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 8 +-- drivers/gpu/drm/drm_modeset_lock.c | 84 ++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 6 +++ include/drm/drm_modeset_lock.h | 5 ++ 4 files changed, 99 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index caaa01f3b353..ab121b6d980c 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2801,7 +2801,7 @@ static int drm_mode_cursor_common(struct drm_device *dev, if (crtc->cursor) return drm_mode_cursor_universal(crtc, req, file_priv); - drm_modeset_lock(&crtc->mutex, NULL); + drm_modeset_lock_crtc(crtc); if (req->flags & DRM_MODE_CURSOR_BO) { if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) { ret = -ENXIO; @@ -2825,7 +2825,7 @@ static int drm_mode_cursor_common(struct drm_device *dev, } } out: - drm_modeset_unlock(&crtc->mutex); + drm_modeset_unlock_crtc(crtc); return ret; @@ -4561,7 +4561,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, if (!crtc) return -ENOENT; - drm_modeset_lock(&crtc->mutex, NULL); + drm_modeset_lock_crtc(crtc); if (crtc->primary->fb == NULL) { /* The framebuffer is currently unbound, presumably * due to a hotplug event, that userspace has not @@ -4645,7 +4645,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, drm_framebuffer_unreference(fb); if (old_fb) drm_framebuffer_unreference(old_fb); - drm_modeset_unlock(&crtc->mutex); + drm_modeset_unlock_crtc(crtc); return ret; } diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 73e6534fd0aa..4753c8bd5ab5 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -129,6 +129,90 @@ void drm_modeset_unlock_all(struct drm_device *dev) } EXPORT_SYMBOL(drm_modeset_unlock_all); +/** + * drm_modeset_lock_crtc - lock crtc with hidden acquire ctx + * @crtc: drm crtc + * + * This function locks the given crtc using a hidden acquire context. This is + * necessary so that drivers internally using the atomic interfaces can grab + * further locks with the lock acquire context. + */ +void drm_modeset_lock_crtc(struct drm_crtc *crtc) +{ + struct drm_modeset_acquire_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (WARN_ON(!ctx)) + return; + + drm_modeset_acquire_init(ctx, 0); + +retry: + ret = drm_modeset_lock(&crtc->mutex, ctx); + if (ret) + goto fail; + + WARN_ON(crtc->acquire_ctx); + + /* now we hold the locks, so now that it is safe, stash the + * ctx for drm_modeset_unlock_crtc(): + */ + crtc->acquire_ctx = ctx; + + return; + +fail: + if (ret == -EDEADLK) { + drm_modeset_backoff(ctx); + goto retry; + } +} +EXPORT_SYMBOL(drm_modeset_lock_crtc); + +/** + * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls + * crtc: drm crtc + * + * Legacy ioctl operations like cursor updates or page flips only have per-crtc + * locking, and store the acquire ctx in the corresponding crtc. All other + * legacy operations take all locks and use a global acquire context. This + * function grabs the right one. + */ +struct drm_modeset_acquire_ctx * +drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc) +{ + if (crtc->acquire_ctx) + return crtc->acquire_ctx; + + WARN_ON(!crtc->dev->mode_config.acquire_ctx); + + return crtc->dev->mode_config.acquire_ctx; +} +EXPORT_SYMBOL(drm_modeset_legacy_acquire_ctx); + +/** + * drm_modeset_unlock_crtc - drop crtc lock + * @crtc: drm crtc + * + * This drops the crtc lock acquire with drm_modeset_lock_crtc() and all other + * locks acquired through the hidden context. + */ +void drm_modeset_unlock_crtc(struct drm_crtc *crtc) +{ + struct drm_modeset_acquire_ctx *ctx = crtc->acquire_ctx; + + if (WARN_ON(!ctx)) + return; + + crtc->acquire_ctx = NULL; + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + + kfree(ctx); +} +EXPORT_SYMBOL(drm_modeset_unlock_crtc); + /** * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked * @dev: device diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index a11d73422e7f..508817bae538 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -371,6 +371,12 @@ struct drm_crtc { void *helper_private; struct drm_object_properties properties; + + /* + * For legacy crtc ioctls so that atomic drivers can get at the locking + * acquire context. + */ + struct drm_modeset_acquire_ctx *acquire_ctx; }; diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index cf61e857bc06..d38e1508f11a 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -120,10 +120,15 @@ int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock, void drm_modeset_unlock(struct drm_modeset_lock *lock); struct drm_device; +struct drm_crtc; void drm_modeset_lock_all(struct drm_device *dev); void drm_modeset_unlock_all(struct drm_device *dev); +void drm_modeset_lock_crtc(struct drm_crtc *crtc); +void drm_modeset_unlock_crtc(struct drm_crtc *crtc); void drm_warn_on_modeset_not_all_locked(struct drm_device *dev); +struct drm_modeset_acquire_ctx * +drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc); int drm_modeset_lock_all_crtcs(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); -- GitLab From 3d30a59bfcb7c96d4aacdb053c2ccc49394b2311 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 27 Jul 2014 13:42:42 +0200 Subject: [PATCH 0084/1868] drm: Move ->old_fb from crtc to plane Atomic implemenations for legacy ioctls must be able to drop locks. Which doesn't cause havoc since we only do that while constructing the new state, so no driver or hardware state change has happened. The only troubling bit is the fb refcounting the core does - if someone else has snuck in then it might potentially unref an outdated framebuffer. To fix that move the old_fb temporary storage into struct drm_plane for all ioctls, so that the atomic helpers can update it. v2: Fix up the error case handling as suggested by Matt Roper and just grab locks uncoditionally - there's no point in optimizing the locking for when userspace gets it wrong. Cc: Matt Roper Cc: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 46 ++++++++++++++++++++------------------ include/drm/drm_crtc.h | 8 +++---- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index ab121b6d980c..cacb460a7145 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1287,19 +1287,21 @@ EXPORT_SYMBOL(drm_plane_index); */ void drm_plane_force_disable(struct drm_plane *plane) { - struct drm_framebuffer *old_fb = plane->fb; int ret; - if (!old_fb) + if (!plane->fb) return; + plane->old_fb = plane->fb; ret = plane->funcs->disable_plane(plane); if (ret) { DRM_ERROR("failed to disable plane with busy fb\n"); + plane->old_fb = NULL; return; } /* disconnect the plane from the fb and crtc: */ - __drm_framebuffer_unreference(old_fb); + __drm_framebuffer_unreference(plane->old_fb); + plane->old_fb = NULL; plane->fb = NULL; plane->crtc = NULL; } @@ -2275,23 +2277,21 @@ static int setplane_internal(struct drm_plane *plane, uint32_t src_w, uint32_t src_h) { struct drm_device *dev = plane->dev; - struct drm_framebuffer *old_fb = NULL; int ret = 0; unsigned int fb_width, fb_height; int i; + drm_modeset_lock_all(dev); /* No fb means shut it down */ if (!fb) { - drm_modeset_lock_all(dev); - old_fb = plane->fb; + plane->old_fb = plane->fb; ret = plane->funcs->disable_plane(plane); if (!ret) { plane->crtc = NULL; plane->fb = NULL; } else { - old_fb = NULL; + plane->old_fb = NULL; } - drm_modeset_unlock_all(dev); goto out; } @@ -2331,8 +2331,7 @@ static int setplane_internal(struct drm_plane *plane, goto out; } - drm_modeset_lock_all(dev); - old_fb = plane->fb; + plane->old_fb = plane->fb; ret = plane->funcs->update_plane(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h); @@ -2341,15 +2340,16 @@ static int setplane_internal(struct drm_plane *plane, plane->fb = fb; fb = NULL; } else { - old_fb = NULL; + plane->old_fb = NULL; } - drm_modeset_unlock_all(dev); out: if (fb) drm_framebuffer_unreference(fb); - if (old_fb) - drm_framebuffer_unreference(old_fb); + if (plane->old_fb) + drm_framebuffer_unreference(plane->old_fb); + plane->old_fb = NULL; + drm_modeset_unlock_all(dev); return ret; @@ -2456,7 +2456,7 @@ int drm_mode_set_config_internal(struct drm_mode_set *set) * crtcs. Atomic modeset will have saner semantics ... */ list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) - tmp->old_fb = tmp->primary->fb; + tmp->primary->old_fb = tmp->primary->fb; fb = set->fb; @@ -2469,8 +2469,9 @@ int drm_mode_set_config_internal(struct drm_mode_set *set) list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) { if (tmp->primary->fb) drm_framebuffer_reference(tmp->primary->fb); - if (tmp->old_fb) - drm_framebuffer_unreference(tmp->old_fb); + if (tmp->primary->old_fb) + drm_framebuffer_unreference(tmp->primary->old_fb); + tmp->primary->old_fb = NULL; } return ret; @@ -4545,7 +4546,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, { struct drm_mode_crtc_page_flip *page_flip = data; struct drm_crtc *crtc; - struct drm_framebuffer *fb = NULL, *old_fb = NULL; + struct drm_framebuffer *fb = NULL; struct drm_pending_vblank_event *e = NULL; unsigned long flags; int ret = -EINVAL; @@ -4617,7 +4618,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, (void (*) (struct drm_pending_event *)) kfree; } - old_fb = crtc->primary->fb; + crtc->primary->old_fb = crtc->primary->fb; ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags); if (ret) { if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) { @@ -4627,7 +4628,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, kfree(e); } /* Keep the old fb, don't unref it. */ - old_fb = NULL; + crtc->primary->old_fb = NULL; } else { /* * Warn if the driver hasn't properly updated the crtc->fb @@ -4643,8 +4644,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, out: if (fb) drm_framebuffer_unreference(fb); - if (old_fb) - drm_framebuffer_unreference(old_fb); + if (crtc->primary->old_fb) + drm_framebuffer_unreference(crtc->primary->old_fb); + crtc->primary->old_fb = NULL; drm_modeset_unlock_crtc(crtc); return ret; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 508817bae538..279565aa0c33 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -341,10 +341,6 @@ struct drm_crtc { int cursor_x; int cursor_y; - /* Temporary tracking of the old fb while a modeset is ongoing. Used - * by drm_mode_set_config_internal to implement correct refcounting. */ - struct drm_framebuffer *old_fb; - bool enabled; /* Requested mode from modesetting. */ @@ -623,6 +619,10 @@ struct drm_plane { struct drm_crtc *crtc; struct drm_framebuffer *fb; + /* Temporary tracking of the old fb while a modeset is ongoing. Used + * by drm_mode_set_config_internal to implement correct refcounting. */ + struct drm_framebuffer *old_fb; + const struct drm_plane_funcs *funcs; struct drm_object_properties properties; -- GitLab From cb597bb3a2fbfc871cc1c703fb330d247bd21394 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 27 Jul 2014 19:09:33 +0200 Subject: [PATCH 0085/1868] drm: trylock modest locking for fbdev panics In the fbdev code we want to do trylocks only to avoid deadlocks and other ugly issues. Thus far we've only grabbed the overall modeset lock, but that already failed to exclude a pile of potential concurrent operations. With proper atomic support this will be worse. So add a trylock mode to the modeset locking code which attempts all locks only with trylocks, if possible. We need to track this in the locking functions themselves and can't restrict this to drivers since driver-private w/w mutexes must be treated the same way. There's still the issue that other driver private locks aren't handled here at all, but well can't have everything. With this we will at least not regress, even once atomic allows lots of concurrent kms activity. Aside: We should move the acquire context to stack-based allocation in the callers to get rid of that awful WARN_ON(kmalloc_failed) control flow which just blows up when memory is short. But that's material for separate patches. v2: - Fix logic inversion fumble in the fb helper. - Add proper kerneldoc. Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 10 +++--- drivers/gpu/drm/drm_modeset_lock.c | 56 +++++++++++++++++++++++------- include/drm/drm_modeset_lock.h | 6 ++++ 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3a6b6635e3f5..7b7b9565188f 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -365,11 +365,11 @@ static bool drm_fb_helper_force_kernel_mode(void) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) continue; - /* NOTE: we use lockless flag below to avoid grabbing other - * modeset locks. So just trylock the underlying mutex - * directly: + /* + * NOTE: Use trylock mode to avoid deadlocks and sleeping in + * panic context. */ - if (!mutex_trylock(&dev->mode_config.mutex)) { + if (__drm_modeset_lock_all(dev, true) != 0) { error = true; continue; } @@ -378,7 +378,7 @@ static bool drm_fb_helper_force_kernel_mode(void) if (ret) error = true; - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); } return error; } diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 4753c8bd5ab5..5280b64a0230 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -57,26 +57,37 @@ /** - * drm_modeset_lock_all - take all modeset locks - * @dev: drm device + * __drm_modeset_lock_all - internal helper to grab all modeset locks + * @dev: DRM device + * @trylock: trylock mode for atomic contexts * - * This function takes all modeset locks, suitable where a more fine-grained - * scheme isn't (yet) implemented. Locks must be dropped with - * drm_modeset_unlock_all. + * This is a special version of drm_modeset_lock_all() which can also be used in + * atomic contexts. Then @trylock must be set to true. + * + * Returns: + * 0 on success or negative error code on failure. */ -void drm_modeset_lock_all(struct drm_device *dev) +int __drm_modeset_lock_all(struct drm_device *dev, + bool trylock) { struct drm_mode_config *config = &dev->mode_config; struct drm_modeset_acquire_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (WARN_ON(!ctx)) - return; + ctx = kzalloc(sizeof(*ctx), + trylock ? GFP_ATOMIC : GFP_KERNEL); + if (!ctx) + return -ENOMEM; - mutex_lock(&config->mutex); + if (trylock) { + if (!mutex_trylock(&config->mutex)) + return -EBUSY; + } else { + mutex_lock(&config->mutex); + } drm_modeset_acquire_init(ctx, 0); + ctx->trylock_only = trylock; retry: ret = drm_modeset_lock(&config->connection_mutex, ctx); @@ -95,13 +106,29 @@ void drm_modeset_lock_all(struct drm_device *dev) drm_warn_on_modeset_not_all_locked(dev); - return; + return 0; fail: if (ret == -EDEADLK) { drm_modeset_backoff(ctx); goto retry; } + + return ret; +} +EXPORT_SYMBOL(__drm_modeset_lock_all); + +/** + * drm_modeset_lock_all - take all modeset locks + * @dev: drm device + * + * This function takes all modeset locks, suitable where a more fine-grained + * scheme isn't (yet) implemented. Locks must be dropped with + * drm_modeset_unlock_all. + */ +void drm_modeset_lock_all(struct drm_device *dev) +{ + WARN_ON(__drm_modeset_lock_all(dev, false) != 0); } EXPORT_SYMBOL(drm_modeset_lock_all); @@ -287,7 +314,12 @@ static inline int modeset_lock(struct drm_modeset_lock *lock, WARN_ON(ctx->contended); - if (interruptible && slow) { + if (ctx->trylock_only) { + if (!ww_mutex_trylock(&lock->mutex)) + return -EBUSY; + else + return 0; + } else if (interruptible && slow) { ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx); } else if (interruptible) { ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx); diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index d38e1508f11a..a3f736d24382 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -53,6 +53,11 @@ struct drm_modeset_acquire_ctx { * list of held locks (drm_modeset_lock) */ struct list_head locked; + + /** + * Trylock mode, use only for panic handlers! + */ + bool trylock_only; }; /** @@ -123,6 +128,7 @@ struct drm_device; struct drm_crtc; void drm_modeset_lock_all(struct drm_device *dev); +int __drm_modeset_lock_all(struct drm_device *dev, bool trylock); void drm_modeset_unlock_all(struct drm_device *dev); void drm_modeset_lock_crtc(struct drm_crtc *crtc); void drm_modeset_unlock_crtc(struct drm_crtc *crtc); -- GitLab From 2c0827cffca8ac0c654b888c58a1989a5172f007 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 8 Aug 2014 20:44:59 +0200 Subject: [PATCH 0086/1868] drm/i915: Update DRIVER_DATE to 20140808 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 125a83c70768..fab97bc3215f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -53,7 +53,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20140725" +#define DRIVER_DATE "20140808" enum pipe { INVALID_PIPE = -1, -- GitLab From 4fa790421c10e5c9c62406655c06d97a94555d54 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 19:25:57 +0100 Subject: [PATCH 0087/1868] drm/i915: Fix erroneous conversion to u8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adj was defined as u8. The issue is last_adj can be negative and adj is initialized with: adj = dev_priv->rps.last_adj; and we were also happily doing things like: if (adj < 0) (thank static analysers!) v2: Make new_delay an int in case we overflow the u8 in the intermediate computations. new_delay will get clamped at the end anyway. (Ville) Cc: Deepak S Cc: Ville Syrjälä Signed-off-by: Damien Lespiau Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 87abe8679495..f0d24db76e72 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1327,10 +1327,10 @@ static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, * @dev_priv: DRM device private * */ -static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) +static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) { u32 residency_C0_up = 0, residency_C0_down = 0; - u8 new_delay, adj; + int new_delay, adj; dev_priv->rps.ei_interrupt_count++; -- GitLab From f45651bae2ee73ae551699d481f76aa6ad92138f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 8 Aug 2014 21:51:10 +0300 Subject: [PATCH 0088/1868] drm/i915: Eliminate rmw from .update_primary_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the entire DSPCNTR register setup into the .update_primary_plane() functions. That's where it belongs anyway and it'll also help 830M which has the extra problem that plane registers reads will return the value latched at the last vblank, not the value that was last written. Also move DSPPOS and DSPSIZE setup there. v2: Don't move variable initialization to avoid churn later Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 100 +++++++++------------------ 1 file changed, 32 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 24295694e493..041fd76a2ded 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2388,12 +2388,26 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; - u32 reg; + u32 reg = DSPCNTR(plane); + + dspcntr = DISPPLANE_GAMMA_ENABLE; + + if (intel_crtc->primary_enabled) + dspcntr |= DISPLAY_PLANE_ENABLE; + + if (INTEL_INFO(dev)->gen < 4) { + if (intel_crtc->pipe == PIPE_B) + dspcntr |= DISPPLANE_SEL_PIPE_B; + + /* pipesrc and dspsize control the size that is scaled from, + * which should always be the user's requested size. + */ + I915_WRITE(DSPSIZE(plane), + ((intel_crtc->config.pipe_src_h - 1) << 16) | + (intel_crtc->config.pipe_src_w - 1)); + I915_WRITE(DSPPOS(plane), 0); + } - reg = DSPCNTR(plane); - dspcntr = I915_READ(reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -2425,12 +2439,9 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, BUG(); } - if (INTEL_INFO(dev)->gen >= 4) { - if (obj->tiling_mode != I915_TILING_NONE) - dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - } + if (INTEL_INFO(dev)->gen >= 4 && + obj->tiling_mode != I915_TILING_NONE) + dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -2474,12 +2485,16 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; - u32 reg; + u32 reg = DSPCNTR(plane); + + dspcntr = DISPPLANE_GAMMA_ENABLE; + + if (intel_crtc->primary_enabled) + dspcntr |= DISPLAY_PLANE_ENABLE; + + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; - reg = DSPCNTR(plane); - dspcntr = I915_READ(reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -2509,12 +2524,8 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, if (obj->tiling_mode != I915_TILING_NONE) dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - dspcntr &= ~DISPPLANE_TRICKLE_FEED_DISABLE; - else + if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; I915_WRITE(reg, dspcntr); @@ -3936,7 +3947,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - enum plane plane = intel_crtc->plane; WARN_ON(!crtc->enabled); @@ -3958,10 +3968,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) ironlake_set_pipeconf(crtc); - /* Set up the display plane register */ - I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4049,7 +4055,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - enum plane plane = intel_crtc->plane; WARN_ON(!crtc->enabled); @@ -4073,10 +4078,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pipe_csc(crtc); - /* Set up the display plane register */ - I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE | DISPPLANE_PIPE_CSC_ENABLE); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4632,9 +4633,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; bool is_dsi; - u32 dspcntr; WARN_ON(!crtc->enabled); @@ -4650,27 +4649,13 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) vlv_prepare_pll(intel_crtc); } - /* Set up the display plane register */ - dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->config.has_dp_encoder) intel_dp_set_m_n(intel_crtc); intel_set_pipe_timings(intel_crtc); - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((intel_crtc->config.pipe_src_h - 1) << 16) | - (intel_crtc->config.pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - i9xx_set_pipeconf(intel_crtc); - I915_WRITE(DSPCNTR(plane), dspcntr); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4725,8 +4710,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; - u32 dspcntr; WARN_ON(!crtc->enabled); @@ -4735,32 +4718,13 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) i9xx_set_pll_dividers(intel_crtc); - /* Set up the display plane register */ - dspcntr = DISPPLANE_GAMMA_ENABLE; - - if (pipe == 0) - dspcntr &= ~DISPPLANE_SEL_PIPE_MASK; - else - dspcntr |= DISPPLANE_SEL_PIPE_B; - if (intel_crtc->config.has_dp_encoder) intel_dp_set_m_n(intel_crtc); intel_set_pipe_timings(intel_crtc); - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((intel_crtc->config.pipe_src_h - 1) << 16) | - (intel_crtc->config.pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - i9xx_set_pipeconf(intel_crtc); - I915_WRITE(DSPCNTR(plane), dspcntr); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); -- GitLab From fdd508a6419217cce28213f3c9bd27c02a0d4c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 8 Aug 2014 21:51:11 +0300 Subject: [PATCH 0089/1868] drm/i915: Call .update_primary_plane in intel_{enable, disable}_primary_hw_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the intel_{enable,disable}_primary_hw_plane() simply call .update_primary_plane(), thus eliminating the rmw from these functions which should help the poor old 830M. Now we can also remove the .update_primary_plane() from the .crtc_enable() hooks because we end up calling it via intel_crtc_enable_planes()->intel_enable_primary_hw_plane(). This also has the nice benefit of making primary planes a bit closer to the way we handle sprite planes during modesets. v2: Just write 0 to DSPCNTR and DSPSURF/DSPADDR if the plane is (to be) disabled. Quicker, and more importantly avoids an oops when fb==NULL due to BIOS fb takeover failure. Pimp the commit message a bit (Matt) v3: Drop useless primary_enabled checks when setting DISPLAY_PLANE_ENABLE Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 119 +++++++++++---------------- 1 file changed, 49 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 041fd76a2ded..f306c91b74a5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2082,35 +2082,28 @@ void intel_flush_primary_plane(struct drm_i915_private *dev_priv, /** * intel_enable_primary_hw_plane - enable the primary plane on a given pipe - * @dev_priv: i915 private structure - * @plane: plane to enable - * @pipe: pipe being fed + * @plane: plane to be enabled + * @crtc: crtc for the plane * - * Enable @plane on @pipe, making sure that @pipe is running first. + * Enable @plane on @crtc, making sure that the pipe is running first. */ -static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv, - enum plane plane, enum pipe pipe) +static void intel_enable_primary_hw_plane(struct drm_plane *plane, + struct drm_crtc *crtc) { - struct drm_device *dev = dev_priv->dev; - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - int reg; - u32 val; + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); /* If the pipe isn't enabled, we can't pump pixels and may hang */ - assert_pipe_enabled(dev_priv, pipe); + assert_pipe_enabled(dev_priv, intel_crtc->pipe); if (intel_crtc->primary_enabled) return; intel_crtc->primary_enabled = true; - reg = DSPCNTR(plane); - val = I915_READ(reg); - WARN_ON(val & DISPLAY_PLANE_ENABLE); - - I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); - intel_flush_primary_plane(dev_priv, plane); + dev_priv->display.update_primary_plane(crtc, plane->fb, + crtc->x, crtc->y); /* * BDW signals flip done immediately if the plane @@ -2123,31 +2116,27 @@ static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv, /** * intel_disable_primary_hw_plane - disable the primary hardware plane - * @dev_priv: i915 private structure - * @plane: plane to disable - * @pipe: pipe consuming the data + * @plane: plane to be disabled + * @crtc: crtc for the plane * - * Disable @plane; should be an independent operation. + * Disable @plane on @crtc, making sure that the pipe is running first. */ -static void intel_disable_primary_hw_plane(struct drm_i915_private *dev_priv, - enum plane plane, enum pipe pipe) +static void intel_disable_primary_hw_plane(struct drm_plane *plane, + struct drm_crtc *crtc) { - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - int reg; - u32 val; + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + assert_pipe_enabled(dev_priv, intel_crtc->pipe); if (!intel_crtc->primary_enabled) return; intel_crtc->primary_enabled = false; - reg = DSPCNTR(plane); - val = I915_READ(reg); - WARN_ON((val & DISPLAY_PLANE_ENABLE) == 0); - - I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE); - intel_flush_primary_plane(dev_priv, plane); + dev_priv->display.update_primary_plane(crtc, plane->fb, + crtc->x, crtc->y); } static bool need_vtd_wa(struct drm_device *dev) @@ -2390,10 +2379,19 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, u32 dspcntr; u32 reg = DSPCNTR(plane); + if (!intel_crtc->primary_enabled) { + I915_WRITE(reg, 0); + if (INTEL_INFO(dev)->gen >= 4) + I915_WRITE(DSPSURF(plane), 0); + else + I915_WRITE(DSPADDR(plane), 0); + POSTING_READ(reg); + return; + } + dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->primary_enabled) - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr |= DISPLAY_PLANE_ENABLE; if (INTEL_INFO(dev)->gen < 4) { if (intel_crtc->pipe == PIPE_B) @@ -2487,10 +2485,16 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, u32 dspcntr; u32 reg = DSPCNTR(plane); + if (!intel_crtc->primary_enabled) { + I915_WRITE(reg, 0); + I915_WRITE(DSPSURF(plane), 0); + POSTING_READ(reg); + return; + } + dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->primary_enabled) - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr |= DISPLAY_PLANE_ENABLE; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; @@ -3884,14 +3888,12 @@ static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) static void intel_crtc_enable_planes(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; drm_vblank_on(dev, pipe); - intel_enable_primary_hw_plane(dev_priv, plane, pipe); + intel_enable_primary_hw_plane(crtc->primary, crtc); intel_enable_planes(crtc); intel_crtc_update_cursor(crtc, true); intel_crtc_dpms_overlay(intel_crtc, true); @@ -3928,7 +3930,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) intel_crtc_dpms_overlay(intel_crtc, false); intel_crtc_update_cursor(crtc, false); intel_disable_planes(crtc); - intel_disable_primary_hw_plane(dev_priv, plane, pipe); + intel_disable_primary_hw_plane(crtc->primary, crtc); /* * FIXME: Once we grow proper nuclear flip support out of this we need @@ -3968,9 +3970,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) ironlake_set_pipeconf(crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4078,9 +4077,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pipe_csc(crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4629,7 +4625,6 @@ static void valleyview_modeset_global_resources(struct drm_device *dev) static void valleyview_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; @@ -4656,9 +4651,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) i9xx_set_pipeconf(intel_crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4706,7 +4698,6 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc) static void i9xx_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; @@ -4725,9 +4716,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) i9xx_set_pipeconf(intel_crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; if (!IS_GEN2(dev)) @@ -11351,7 +11339,6 @@ static int intel_crtc_set_config(struct drm_mode_set *set) ret = intel_set_mode(set->crtc, set->mode, set->x, set->y, set->fb); } else if (config->fb_changed) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(set->crtc); intel_crtc_wait_for_pending_flips(set->crtc); @@ -11365,8 +11352,7 @@ static int intel_crtc_set_config(struct drm_mode_set *set) */ if (!intel_crtc->primary_enabled && ret == 0) { WARN_ON(!intel_crtc->active); - intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane, - intel_crtc->pipe); + intel_enable_primary_hw_plane(set->crtc->primary, set->crtc); } /* @@ -11519,8 +11505,6 @@ static int intel_primary_plane_disable(struct drm_plane *plane) { struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_crtc *intel_crtc; if (!plane->fb) @@ -11543,8 +11527,8 @@ intel_primary_plane_disable(struct drm_plane *plane) goto disable_unpin; intel_crtc_wait_for_pending_flips(plane->crtc); - intel_disable_primary_hw_plane(dev_priv, intel_plane->plane, - intel_plane->pipe); + intel_disable_primary_hw_plane(plane, plane->crtc); + disable_unpin: mutex_lock(&dev->struct_mutex); i915_gem_track_fb(intel_fb_obj(plane->fb), NULL, @@ -11564,9 +11548,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, uint32_t src_w, uint32_t src_h) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->fb); struct drm_rect dest = { @@ -11653,9 +11635,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe)); if (intel_crtc->primary_enabled) - intel_disable_primary_hw_plane(dev_priv, - intel_plane->plane, - intel_plane->pipe); + intel_disable_primary_hw_plane(plane, crtc); if (plane->fb != fb) @@ -11672,8 +11652,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, return ret; if (!intel_crtc->primary_enabled) - intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane, - intel_crtc->pipe); + intel_enable_primary_hw_plane(plane, crtc); return 0; } -- GitLab From 22c59960d9fe72f3fbd28de69cc43c5522dd5fe6 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 8 Aug 2014 17:45:32 -0300 Subject: [PATCH 0090/1868] drm/i915: fix i915_interrupt_info on BDW Currently, if the machine is runtime suspended an you read the file, you will get an "Unclaimed register" error message. Testcase: igt/pm_rpm/debugfs-read Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 330caa1ab9f9..3b7decbeeed3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -703,6 +703,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } for_each_pipe(pipe) { + if (!intel_display_power_enabled(dev_priv, + POWER_DOMAIN_PIPE(pipe))) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } seq_printf(m, "Pipe %c IMR:\t%08x\n", pipe_name(pipe), I915_READ(GEN8_DE_PIPE_IMR(pipe))); -- GitLab From 3bb11b536c1037143765b4efc8056600438df7f6 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Mon, 11 Aug 2014 09:06:39 +0530 Subject: [PATCH 0091/1868] drm/i915: Continuation of future readiness series Removing the check for HAS_PCH_SPLIT, it looks redundant here. Anyways all the platforms are checked separately. v2: Reordering as per the gen (Ville) Signed-off-by: Sonika Jindal Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 42 +++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f306c91b74a5..0746590ed4e3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12354,29 +12354,27 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.get_display_clock_speed = i830_get_display_clock_speed; - if (HAS_PCH_SPLIT(dev)) { - if (IS_GEN5(dev)) { - dev_priv->display.fdi_link_train = ironlake_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - } else if (IS_GEN6(dev)) { - dev_priv->display.fdi_link_train = gen6_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - dev_priv->display.modeset_global_resources = - snb_modeset_global_resources; - } else if (IS_IVYBRIDGE(dev)) { - /* FIXME: detect B0+ stepping and use auto training */ - dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - dev_priv->display.modeset_global_resources = - ivb_modeset_global_resources; - } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { - dev_priv->display.fdi_link_train = hsw_fdi_link_train; - dev_priv->display.write_eld = haswell_write_eld; - dev_priv->display.modeset_global_resources = - haswell_modeset_global_resources; - } - } else if (IS_G4X(dev)) { + if (IS_G4X(dev)) { dev_priv->display.write_eld = g4x_write_eld; + } else if (IS_GEN5(dev)) { + dev_priv->display.fdi_link_train = ironlake_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + } else if (IS_GEN6(dev)) { + dev_priv->display.fdi_link_train = gen6_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + dev_priv->display.modeset_global_resources = + snb_modeset_global_resources; + } else if (IS_IVYBRIDGE(dev)) { + /* FIXME: detect B0+ stepping and use auto training */ + dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + dev_priv->display.modeset_global_resources = + ivb_modeset_global_resources; + } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { + dev_priv->display.fdi_link_train = hsw_fdi_link_train; + dev_priv->display.write_eld = haswell_write_eld; + dev_priv->display.modeset_global_resources = + haswell_modeset_global_resources; } else if (IS_VALLEYVIEW(dev)) { dev_priv->display.modeset_global_resources = valleyview_modeset_global_resources; -- GitLab From d6699dd3a7f696a80a5f8e5bb6ecf6ff6dd7c998 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 16:29:31 +0100 Subject: [PATCH 0092/1868] drm/i915: Fix wrong number of HDMI translation entries I keep telling myself that those tables aren't great because their size is the number of dwords we need to program and not the number of entries (number of dwords = number of entries * 2). And... I got it wrong when I refactored the code. Fortunately, it was only wrong when the VBT table (or the code parsing it) is itself erroneous. Long story short, it shouldn't matter, but still, there's a potential array overflow and random programming of the DDI translation tables. Cc: Paulo Zanoni Signed-off-by: Damien Lespiau Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ca1f9a8a7d03..02d55843c78d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -169,14 +169,14 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 7; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; ddi_translations_hdmi = hsw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 6; } else { WARN(1, "ddi translation table missing\n"); @@ -184,7 +184,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 7; } -- GitLab From dc8cd1e790081a31ba4d86c3c0812c348eeec7fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 17:37:22 +0100 Subject: [PATCH 0093/1868] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt If an object is not bound into the global GTT, then it cannot be accessed via the GTT. This restores the original code that was muddled by ppGTT. In the process, we remove a WARN that had long outlived its usefulness and was simply being coded around instead. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 20743bd421f9..1be7e541a7c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3592,11 +3592,12 @@ int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ - if (!i915_gem_obj_bound_any(obj)) + if (vma == NULL) return -EINVAL; if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) @@ -3638,13 +3639,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_write_domain); /* And bump the LRU for this access */ - if (i915_gem_object_is_inactive(obj)) { - struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); - if (vma) - list_move_tail(&vma->mm_list, - &dev_priv->gtt.base.inactive_list); - - } + if (i915_gem_object_is_inactive(obj)) + list_move_tail(&vma->mm_list, + &dev_priv->gtt.base.inactive_list); return 0; } @@ -3808,9 +3805,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - if (list_empty(&obj->vma_list)) - return false; - vma = i915_gem_obj_to_ggtt(obj); if (!vma) return false; @@ -5253,12 +5247,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - /* This WARN has probably outlived its usefulness (callers already - * WARN if they don't find the GGTT vma they expect). When removing, - * remember to remove the pre-check in is_pin_display() as well */ - if (WARN_ON(list_empty(&obj->vma_list))) - return NULL; - vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); if (vma->vm != obj_to_ggtt(obj)) return NULL; -- GitLab From e6a844687cf929ec053c7578d5ecc794a8a6c5cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 12:00:12 +0200 Subject: [PATCH 0094/1868] drm/i915: Force CPU relocations if not GTT mapped Move the decision on whether we need to have a mappable object during execbuffer to the fore and then reuse that decision by propagating the flag through to reservation. As a corollary, before doing the actual relocation through the GTT, we can make sure that we do have a GTT mapping through which to operate. Note that the key to make this work is to ditch the obj->map_and_fenceable unbind optimization - with full ppgtt it doesn't make a lot of sense any more anyway. v2: Revamp and resend to ease future patches. v3: Refresh patch rationale References: https://bugs.freedesktop.org/show_bug.cgi?id=81094 Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter [danvet: Explain why obj->map_and_fenceable is key and split out the secure batch fix.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 +-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 59 +++++++++++----------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1be7e541a7c7..1ca2231e3929 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2928,9 +2928,8 @@ int i915_vma_unbind(struct i915_vma *vma) vma->unbind_vma(vma); list_del_init(&vma->mm_list); - /* Avoid an unnecessary call to unbind on rebind. */ if (i915_is_ggtt(vma->vm)) - obj->map_and_fenceable = true; + obj->map_and_fenceable = false; drm_mm_remove_node(&vma->node); i915_gem_vma_destroy(vma); @@ -3282,6 +3281,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } } else if (enable) { + if (WARN_ON(!obj->map_and_fenceable)) + return -EINVAL; + reg = i915_find_fence_reg(dev); if (IS_ERR(reg)) return PTR_ERR(reg); @@ -4331,8 +4333,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->fence_reg = I915_FENCE_REG_NONE; obj->madv = I915_MADV_WILLNEED; - /* Avoid an unnecessary call to unbind on the first bind. */ - obj->map_and_fenceable = true; i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 60998fc4e5b2..6320a385841b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,7 @@ #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) #define BATCH_OFFSET_BIAS (256*1024) @@ -534,14 +535,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) return ret; } -static int -need_reloc_mappable(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - return entry->relocation_count && !use_cpu_reloc(vma->obj) && - i915_is_ggtt(vma->vm); -} - static int i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *ring, @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence; uint64_t flags; int ret; flags = 0; - - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - if (need_fence || need_reloc_mappable(vma)) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_MAPPABLE; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, } static bool -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) +need_reloc_mappable(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct drm_i915_gem_object *obj = vma->obj; - bool need_fence, need_mappable; - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); + if (entry->relocation_count == 0) + return false; + + if (!i915_is_ggtt(vma->vm)) + return false; + + /* See also use_cpu_reloc() */ + if (HAS_LLC(vma->obj->base.dev)) + return false; + + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; - WARN_ON((need_mappable || need_fence) && + return true; +} + +static bool +eb_vma_misplaced(struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + struct drm_i915_gem_object *obj = vma->obj; + + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_is_ggtt(vma->vm)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; - if (need_mappable && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return true; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); - if (need_mappable) + if (need_mappable) { + entry->flags |= __EXEC_OBJECT_NEEDS_MAP; list_move(&vma->exec_list, &ordered_vmas); - else + } else list_move_tail(&vma->exec_list, &ordered_vmas); obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, if (!drm_mm_node_allocated(&vma->node)) continue; - if (eb_vma_misplaced(vma, has_fenced_gpu_access)) + if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); -- GitLab From 82b6b6d786466e705e7244cc676189ce47a9199a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 17:37:24 +0100 Subject: [PATCH 0095/1868] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access This migrates the fence tracking onto the existing seqno infrastructure so that the later conversion to tracking via requests is simplified. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 ----- drivers/gpu/drm/i915/i915_gem.c | 17 ----------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 ++++++++++++---------- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- 4 files changed, 20 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fab97bc3215f..5c3f033ff928 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1777,13 +1777,6 @@ struct drm_i915_gem_object { * Only honoured if hardware has relevant pte bit */ unsigned long gt_ro:1; - - /* - * Is the GPU currently using a fence to access this buffer, - */ - unsigned int pending_fenced_gpu_access:1; - unsigned int fenced_gpu_access:1; - unsigned int cache_level:3; unsigned int has_aliasing_ppgtt_mapping:1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1ca2231e3929..3eec344bdac0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2161,8 +2161,6 @@ static void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct intel_engine_cs *ring) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); @@ -2181,19 +2179,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, list_move_tail(&obj->ring_list, &ring->active_list); obj->last_read_seqno = seqno; - - if (obj->fenced_gpu_access) { - obj->last_fenced_seqno = seqno; - - /* Bump MRU to take account of the delayed flush */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_fence_reg *reg; - - reg = &dev_priv->fence_regs[obj->fence_reg]; - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - } - } } void i915_vma_move_to_active(struct i915_vma *vma, @@ -2229,7 +2214,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) obj->base.write_domain = 0; obj->last_fenced_seqno = 0; - obj->fenced_gpu_access = false; obj->active = 0; drm_gem_object_unreference(&obj->base); @@ -3174,7 +3158,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) obj->last_fenced_seqno = 0; } - obj->fenced_gpu_access = false; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6320a385841b..70946c551e5d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; uint64_t flags; int ret; @@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, entry->flags |= __EXEC_OBJECT_HAS_PIN; - if (has_fenced_gpu_access) { - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); - if (ret) - return ret; - - if (i915_gem_object_pin_fence(obj)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + ret = i915_gem_object_get_fence(obj); + if (ret) + return ret; - obj->pending_fenced_gpu_access = true; - } + if (i915_gem_object_pin_fence(obj)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; } if (entry->offset != vma->node.start) { @@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj = vma->obj; entry = vma->exec_entry; + if (!has_fenced_gpu_access) + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = - has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); @@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; - obj->pending_fenced_gpu_access = false; } list_splice(&ordered_vmas, vmas); @@ -959,9 +954,11 @@ static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_engine_cs *ring) { + u32 seqno = intel_ring_get_seqno(ring); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; @@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, if (obj->base.write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - obj->fenced_gpu_access = obj->pending_fenced_gpu_access; i915_vma_move_to_active(vma, ring); if (obj->base.write_domain) { obj->dirty = 1; - obj->last_write_seqno = intel_ring_get_seqno(ring); + obj->last_write_seqno = seqno; intel_fb_obj_invalidate(obj, ring); /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + obj->last_fenced_seqno = seqno; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + struct drm_i915_private *dev_priv = to_i915(ring->dev); + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, + &dev_priv->mm.fence_list); + } + } trace_i915_gem_object_change_domain(obj, old_read, old_write); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index cb150e8b4336..7e623bf097a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (ret == 0) { obj->fence_dirty = - obj->fenced_gpu_access || + obj->last_fenced_seqno || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; -- GitLab From 87f1f46514babd40fc3551ca2d6148cdedd9c7e3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 19:18:42 +0100 Subject: [PATCH 0096/1868] drm/i915: Copy PCI device id into the device info block This is so that we can make the drm_i915_private->info always the preferred source for chipset type and feature queries. Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 5 ++-- drivers/gpu/drm/i915/i915_drv.h | 50 +++++++++++++++++---------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c965698a8bac..1867e2619e73 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1603,9 +1603,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev->dev_private = dev_priv; dev_priv->dev = dev; - /* copy initial configuration to dev_priv->info */ + /* Setup the write-once "constant" device info */ device_info = (struct intel_device_info *)&dev_priv->info; - *device_info = *info; + memcpy(device_info, info, sizeof(dev_priv->info)); + device_info->device_id = dev->pdev->device; spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c3f033ff928..8a55f07d80cb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -558,6 +558,7 @@ struct intel_uncore { struct intel_device_info { u32 display_mmio_offset; + u16 device_id; u8 num_pipes:3; u8 num_sprites[I915_MAX_PIPES]; u8 gen; @@ -1980,51 +1981,52 @@ struct drm_i915_cmd_table { int count; }; -#define INTEL_INFO(dev) (&to_i915(dev)->info) +#define INTEL_INFO(p) (&to_i915(p)->info) +#define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) -#define IS_I830(dev) ((dev)->pdev->device == 0x3577) -#define IS_845G(dev) ((dev)->pdev->device == 0x2562) +#define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577) +#define IS_845G(dev) (INTEL_DEVID(dev) == 0x2562) #define IS_I85X(dev) (INTEL_INFO(dev)->is_i85x) -#define IS_I865G(dev) ((dev)->pdev->device == 0x2572) +#define IS_I865G(dev) (INTEL_DEVID(dev) == 0x2572) #define IS_I915G(dev) (INTEL_INFO(dev)->is_i915g) -#define IS_I915GM(dev) ((dev)->pdev->device == 0x2592) -#define IS_I945G(dev) ((dev)->pdev->device == 0x2772) +#define IS_I915GM(dev) (INTEL_DEVID(dev) == 0x2592) +#define IS_I945G(dev) (INTEL_DEVID(dev) == 0x2772) #define IS_I945GM(dev) (INTEL_INFO(dev)->is_i945gm) #define IS_BROADWATER(dev) (INTEL_INFO(dev)->is_broadwater) #define IS_CRESTLINE(dev) (INTEL_INFO(dev)->is_crestline) -#define IS_GM45(dev) ((dev)->pdev->device == 0x2A42) +#define IS_GM45(dev) (INTEL_DEVID(dev) == 0x2A42) #define IS_G4X(dev) (INTEL_INFO(dev)->is_g4x) -#define IS_PINEVIEW_G(dev) ((dev)->pdev->device == 0xa001) -#define IS_PINEVIEW_M(dev) ((dev)->pdev->device == 0xa011) +#define IS_PINEVIEW_G(dev) (INTEL_DEVID(dev) == 0xa001) +#define IS_PINEVIEW_M(dev) (INTEL_DEVID(dev) == 0xa011) #define IS_PINEVIEW(dev) (INTEL_INFO(dev)->is_pineview) #define IS_G33(dev) (INTEL_INFO(dev)->is_g33) -#define IS_IRONLAKE_M(dev) ((dev)->pdev->device == 0x0046) +#define IS_IRONLAKE_M(dev) (INTEL_DEVID(dev) == 0x0046) #define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge) -#define IS_IVB_GT1(dev) ((dev)->pdev->device == 0x0156 || \ - (dev)->pdev->device == 0x0152 || \ - (dev)->pdev->device == 0x015a) -#define IS_SNB_GT1(dev) ((dev)->pdev->device == 0x0102 || \ - (dev)->pdev->device == 0x0106 || \ - (dev)->pdev->device == 0x010A) +#define IS_IVB_GT1(dev) (INTEL_DEVID(dev) == 0x0156 || \ + INTEL_DEVID(dev) == 0x0152 || \ + INTEL_DEVID(dev) == 0x015a) +#define IS_SNB_GT1(dev) (INTEL_DEVID(dev) == 0x0102 || \ + INTEL_DEVID(dev) == 0x0106 || \ + INTEL_DEVID(dev) == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_CHERRYVIEW(dev) (INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev)) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_BROADWELL(dev) (!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev)) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) #define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0xFF00) == 0x0C00) + (INTEL_DEVID(dev) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev) (IS_BROADWELL(dev) && \ - (((dev)->pdev->device & 0xf) == 0x2 || \ - ((dev)->pdev->device & 0xf) == 0x6 || \ - ((dev)->pdev->device & 0xf) == 0xe)) + ((INTEL_DEVID(dev) & 0xf) == 0x2 || \ + (INTEL_DEVID(dev) & 0xf) == 0x6 || \ + (INTEL_DEVID(dev) & 0xf) == 0xe)) #define IS_HSW_ULT(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0xFF00) == 0x0A00) + (INTEL_DEVID(dev) & 0xFF00) == 0x0A00) #define IS_ULT(dev) (IS_HSW_ULT(dev) || IS_BDW_ULT(dev)) #define IS_HSW_GT3(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0x00F0) == 0x0020) + (INTEL_DEVID(dev) & 0x00F0) == 0x0020) /* ULX machines are also considered ULT. */ -#define IS_HSW_ULX(dev) ((dev)->pdev->device == 0x0A0E || \ - (dev)->pdev->device == 0x0A1E) +#define IS_HSW_ULX(dev) (INTEL_DEVID(dev) == 0x0A0E || \ + INTEL_DEVID(dev) == 0x0A1E) #define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary) /* -- GitLab From f6daaec29b2a201eb8db2ce26b4460b779ad8111 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 23:00:56 +0100 Subject: [PATCH 0097/1868] drm/i915: Make intel_disable_shared_dpll() static Found with sparse. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0746590ed4e3..245cf4128314 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1797,7 +1797,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc) pll->on = true; } -void intel_disable_shared_dpll(struct intel_crtc *crtc) +static void intel_disable_shared_dpll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; -- GitLab From 1bee20175f27b46427f10290fdd4a79334d41a60 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 23:00:58 +0100 Subject: [PATCH 0098/1868] drm/i915: Remove set but unused 'gt_perf_status' Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 41de760bf1d4..12f4e143328c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3719,7 +3719,6 @@ static void gen6_enable_rps(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; u32 rp_state_cap; - u32 gt_perf_status; u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; @@ -3744,7 +3743,6 @@ static void gen6_enable_rps(struct drm_device *dev) gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); parse_rp_state_cap(dev_priv, rp_state_cap); -- GitLab From 9bec9b1334d687c0a9fcf3d3a1987a61b4826a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 09:21:35 +0100 Subject: [PATCH 0099/1868] drm/i915: Double check ring is idle before declaring the GPU wedged During ring initialisation, sometimes we observe, though not in production hardware, that the idle flag is not set even though the ring is empty. Double check before giving up. Signed-off-by: Chris Wilson Cc: Damien Lespiau Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 117543e58d48..a059b64a0fb2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -478,7 +478,12 @@ static bool stop_ring(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); - return false; + /* Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring)) + return false; } } -- GitLab From dbbe91279511d6a18a521b953a3c139e4787e660 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 19:18:43 +0100 Subject: [PATCH 0100/1868] drm/i915: Agnostic INTEL_INFO Adapt the macro so that we can pass either the struct drm_device or the struct drm_i915_private pointers and get the answer we want. Over time, my plan is to convert all users over to using drm_i915_private and so trimming down the pointer dance. Having spent a few hours chasing that goal and achieved over 8k of object code saving, it appears to be a worthwhile target. This interim macro allows us to slowly convert over. Signed-off-by: Chris Wilson [danvet: Drop the (struct drm_device *) cast per the m-l discussion. Also explain the seemingly unecessary first cast.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1867e2619e73..1763fbf34e1d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1596,6 +1596,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp) return -EINVAL; + /* For the ugly agnostic INTEL_INFO macro */ + BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev)); + dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8a55f07d80cb..6959c1b2d648 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1981,7 +1981,10 @@ struct drm_i915_cmd_table { int count; }; -#define INTEL_INFO(p) (&to_i915(p)->info) +/* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */ +#define __I915__(p) ((sizeof(*(p)) == sizeof(struct drm_i915_private)) ? \ + (struct drm_i915_private *)(p) : to_i915(p)) +#define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) #define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577) -- GitLab From da51a1e7e398129d9fddd4b26b8469145dd4fd08 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 12:08:58 +0200 Subject: [PATCH 0101/1868] drm/i915: Fix secure dispatch with full ppgtt Based upon a hunk from a patch from Chris Wilson, but augmented to: - Process the batch in the full ppgtt vm so that self-relocations match again with userspace's expectations.. - Add a comment why plain pin for the global gtt binding is safe at that point. v2: Drop local bind_vm variable (Chris). v3: Explain why this works despite the lack of proper active tracking for the ggtt batch vma. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 48 +++++++++++----------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 70946c551e5d..e1eac15b2583 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -95,7 +95,6 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_address_space *vm, struct drm_file *file) { - struct drm_i915_private *dev_priv = vm->dev->dev_private; struct drm_i915_gem_object *obj; struct list_head objects; int i, ret; @@ -130,7 +129,6 @@ eb_lookup_vmas(struct eb_vmas *eb, i = 0; while (!list_empty(&objects)) { struct i915_vma *vma; - struct i915_address_space *bind_vm = vm; if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && USES_FULL_PPGTT(vm->dev)) { @@ -138,13 +136,6 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - /* If we have secure dispatch, or the userspace assures us that - * they know what they're doing, use the GGTT VM. - */ - if (((args->flags & I915_EXEC_SECURE) && - (i == (args->buffer_count - 1)))) - bind_vm = &dev_priv->gtt.base; - obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); @@ -157,7 +148,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -1391,25 +1382,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE && - !batch_obj->has_global_gtt_mapping) { - /* When we have multiple VMs, we'll need to make sure that we - * allocate space first */ - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); - BUG_ON(!vma); - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); - } + if (flags & I915_DISPATCH_SECURE) { + /* + * So on first glance it looks freaky that we pin the batch here + * outside of the reservation loop. But: + * - The batch is already pinned into the relevant ppgtt, so we + * already have the backing storage fully allocated. + * - No other BO uses the global gtt (well contexts, but meh), + * so we don't really have issues with mutliple objects not + * fitting due to fragmentation. + * So this is actually safe. + */ + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + if (ret) + goto err; - if (flags & I915_DISPATCH_SECURE) exec_start += i915_gem_obj_ggtt_offset(batch_obj); - else + } else exec_start += i915_gem_obj_offset(batch_obj, vm); ret = legacy_ringbuffer_submission(dev, file, ring, ctx, - args, &eb->vmas, batch_obj, exec_start, flags); - if (ret) - goto err; + args, &eb->vmas, batch_obj, exec_start, flags); + /* + * FIXME: We crucially rely upon the active tracking for the (ppgtt) + * batch vma for correctness. For less ugly and less fragility this + * needs to be adjusted to also track the ggtt batch vma properly as + * active. + */ + if (flags & I915_DISPATCH_SECURE) + i915_gem_object_ggtt_unpin(batch_obj); err: /* the request owns the ref now */ i915_gem_context_unreference(ctx); -- GitLab From ad19f10bc2a5964f1564639e60953de76b7e50f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:08 +0100 Subject: [PATCH 0102/1868] drm/i915: Pre-validate the NEED_GTTS flag for execbuffer We have an implementation requirement that precludes the user from requesting a ggtt entry when the device is operating in ppgtt mode. Move the current check from inside the execbuffer object collation to the prevalidation phase. v2: Roll both invalid flags checks into one Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e1eac15b2583..446f4b6fbefe 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -130,12 +130,6 @@ eb_lookup_vmas(struct eb_vmas *eb, while (!list_empty(&objects)) { struct i915_vma *vma; - if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && - USES_FULL_PPGTT(vm->dev)) { - ret = -EINVAL; - goto err; - } - obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); @@ -877,18 +871,24 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } static int -validate_exec_list(struct drm_i915_gem_exec_object2 *exec, +validate_exec_list(struct drm_device *dev, + struct drm_i915_gem_exec_object2 *exec, int count) { - int i; unsigned relocs_total = 0; unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); + unsigned invalid_flags; + int i; + + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + if (USES_FULL_PPGTT(dev)) + invalid_flags |= EXEC_OBJECT_NEEDS_GTT; for (i = 0; i < count; i++) { char __user *ptr = to_user_ptr(exec[i].relocs_ptr); int length; /* limited by fault_in_pages_readable() */ - if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) + if (exec[i].flags & invalid_flags) return -EINVAL; /* First check for malicious input causing overflow in @@ -1250,7 +1250,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (!i915_gem_check_execbuffer(args)) return -EINVAL; - ret = validate_exec_list(exec, args->buffer_count); + ret = validate_exec_list(dev, exec, args->buffer_count); if (ret) return ret; -- GitLab From 060e82c6f4ccf678decffb28ba8301ca9220a995 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:10 +0100 Subject: [PATCH 0103/1868] drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer Part of the pre-validation for an execbuffer call is that there is at least one object in the execlist. As we bail if we fail to lookup any object, we can be sure that after the eb_lookup_vma() there is at least one object in the vma list and so we do not need to assert. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 446f4b6fbefe..6c07940b468c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -622,9 +622,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; - if (list_empty(vmas)) - return 0; - i915_gem_retire_requests_ring(ring); vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; @@ -725,9 +722,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, int i, total, ret; unsigned count = args->buffer_count; - if (WARN_ON(list_empty(&eb->vmas))) - return 0; - vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; /* We may process another execbuffer during the unlock... */ -- GitLab From 906843c3a1acc36407e500a073679c4207d307cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:11 +0100 Subject: [PATCH 0104/1868] drm/i915: Simplify relocate_entry_gtt() and make 64-bit safe Even though we should not try to use 4+GiB GTTs on 32-bit systems, by using a local variable we can future proof the code whilst making it easier to read. Signed-off-by: Chris Wilson [danvet: Appease checkpatch a bit.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++----------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6c07940b468c..dec2cc2fbd42 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -293,7 +293,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; uint64_t delta = reloc->delta + target_offset; - uint32_t __iomem *reloc_entry; + uint64_t offset; void __iomem *reloc_page; int ret; @@ -306,25 +306,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += i915_gem_obj_ggtt_offset(obj); + offset = i915_gem_obj_ggtt_offset(obj); + offset += reloc->offset; reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + offset_in_page(reloc->offset)); - iowrite32(lower_32_bits(delta), reloc_entry); + offset & PAGE_MASK); + iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); if (INTEL_INFO(dev)->gen >= 8) { - reloc_entry += 1; + offset += sizeof(uint32_t); - if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) { + if (offset_in_page(offset) == 0) { io_mapping_unmap_atomic(reloc_page); - reloc_page = io_mapping_map_atomic_wc( - dev_priv->gtt.mappable, - reloc->offset + sizeof(uint32_t)); - reloc_entry = reloc_page; + reloc_page = + io_mapping_map_atomic_wc(dev_priv->gtt.mappable, + offset); } - iowrite32(upper_32_bits(delta), reloc_entry); + iowrite32(upper_32_bits(delta), + reloc_page + offset_in_page(offset)); } io_mapping_unmap_atomic(reloc_page); -- GitLab From 2a0d7cfd9482ca4c10a4d8794791760a6a7ce40c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Jul 2014 15:32:37 +0200 Subject: [PATCH 0105/1868] drm: Add a plane->reset hook In general having this can't hurt, and the atomic helpers will need it to be able to reset the state objects properly. The overall idea is to reset in the order pixels flow, so planes -> crtcs -> encoders -> connectors. v2: Squash in fixup from Ville to correctly deference struct drm_plane instead of drm_crtc when walking the plane list. Fixes an oops in driver init and resume. Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 5 +++++ include/drm/drm_crtc.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index cacb460a7145..285e62a134b2 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4663,9 +4663,14 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, void drm_mode_config_reset(struct drm_device *dev) { struct drm_crtc *crtc; + struct drm_plane *plane; struct drm_encoder *encoder; struct drm_connector *connector; + list_for_each_entry(plane, &dev->mode_config.plane_list, head) + if (plane->funcs->reset) + plane->funcs->reset(plane); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) if (crtc->funcs->reset) crtc->funcs->reset(crtc); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 279565aa0c33..2c1f58d6957a 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -581,6 +581,7 @@ struct drm_plane_funcs { uint32_t src_w, uint32_t src_h); int (*disable_plane)(struct drm_plane *plane); void (*destroy)(struct drm_plane *plane); + void (*reset)(struct drm_plane *plane); int (*set_property)(struct drm_plane *plane, struct drm_property *property, uint64_t val); -- GitLab From e8450f51a4b39cfe0878b4aee339820b2bfff240 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 25 Jul 2014 23:34:03 +0200 Subject: [PATCH 0106/1868] drm/irq: Implement a generic vblank_wait function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As usual in both a crtc index and a struct drm_crtc * version. The function assumes that no one drivers their display below 10Hz, and it will complain if the vblank wait takes longer than that. v2: Also check dev->max_vblank_counter since some drivers register a fake get_vblank_counter function. v3: Use drm_vblank_count instead of calling the low-level ->get_vblank_counter callback. That way we'll get the sw-cooked counter for platforms without proper vblank support and so can ditch the max_vblank_counter check again. v4: Review from Michel Dänzer: - Restore lost notes about v3: - Spelling in kerneldoc. - Inline wait_event condition. - s/vblank_wait/wait_one_vblank/ Cc: Michel Dänzer Cc: Ville Syrjälä Reviewed-by: Michel Dänzer Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 44 +++++++++++++++++++++++++++++++++++++++ include/drm/drmP.h | 2 ++ 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 6f16a104d6d0..e64d24951fc2 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1009,6 +1009,50 @@ void drm_crtc_vblank_put(struct drm_crtc *crtc) } EXPORT_SYMBOL(drm_crtc_vblank_put); +/** + * drm_wait_one_vblank - wait for one vblank + * @dev: DRM device + * @crtc: crtc index + * + * This waits for one vblank to pass on @crtc, using the irq driver interfaces. + * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. + * due to lack of driver support or because the crtc is off. + */ +void drm_wait_one_vblank(struct drm_device *dev, int crtc) +{ + int ret; + u32 last; + + ret = drm_vblank_get(dev, crtc); + if (WARN_ON(ret)) + return; + + last = drm_vblank_count(dev, crtc); + + ret = wait_event_timeout(dev->vblank[crtc].queue, + last != drm_vblank_count(dev, crtc), + msecs_to_jiffies(100)); + + WARN_ON(ret == 0); + + drm_vblank_put(dev, crtc); +} +EXPORT_SYMBOL(drm_wait_one_vblank); + +/** + * drm_crtc_wait_one_vblank - wait for one vblank + * @crtc: DRM crtc + * + * This waits for one vblank to pass on @crtc, using the irq driver interfaces. + * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. + * due to lack of driver support or because the crtc is off. + */ +void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) +{ + drm_wait_one_vblank(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_wait_one_vblank); + /** * drm_vblank_off - disable vblank events on a CRTC * @dev: DRM device diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d3d9be6b83ef..c2209178981f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1327,6 +1327,8 @@ extern int drm_vblank_get(struct drm_device *dev, int crtc); extern void drm_vblank_put(struct drm_device *dev, int crtc); extern int drm_crtc_vblank_get(struct drm_crtc *crtc); extern void drm_crtc_vblank_put(struct drm_crtc *crtc); +extern void drm_wait_one_vblank(struct drm_device *dev, int crtc); +extern void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); extern void drm_vblank_off(struct drm_device *dev, int crtc); extern void drm_vblank_on(struct drm_device *dev, int crtc); extern void drm_crtc_vblank_off(struct drm_crtc *crtc); -- GitLab From b20385f1f8434ec32d73414ffcadb7dcbd3a2a61 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:10 +0100 Subject: [PATCH 0107/1868] drm/i915/bdw: New source and header file for LRs, LRCs and Execlists Some legacy HW context code assumptions don't make sense for this new submission method, so we will place this stuff in a separate file. Note for reviewers: I've carefully considered the best name for this file and this was my best option (other possibilities were intel_lr_context.c or intel_execlist.c). I am open to a certain bikeshedding on this matter, anyway. And some point in time, it would be a good idea to split intel_lrc.c/.h even further, but for the moment just shove everything together. v2: Change to intel_lrc.c v3: Squash together with the header file addition Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 27 ++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_lrc.c create mode 100644 drivers/gpu/drm/i915/intel_lrc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 91bd167e1cb7..c1dd485aeb6c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \ i915_gpu_error.o \ i915_irq.o \ i915_trace_points.o \ + intel_lrc.o \ intel_ringbuffer.o \ intel_uncore.o diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6959c1b2d648..ec2a094f3622 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -35,6 +35,7 @@ #include "i915_reg.h" #include "intel_bios.h" #include "intel_ringbuffer.h" +#include "intel_lrc.h" #include "i915_gem_gtt.h" #include #include diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c new file mode 100644 index 000000000000..49bb6fcdface --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -0,0 +1,42 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Ben Widawsky + * Michel Thierry + * Thomas Daniel + * Oscar Mateo + * + */ + +/* + * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". + * These expanded contexts enable a number of new abilities, especially + * "Execlists" (also implemented in this file). + * + * Execlists are the new method by which, on gen8+ hardware, workloads are + * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + */ + +#include +#include +#include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h new file mode 100644 index 000000000000..f6830a4ec773 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -0,0 +1,27 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_LRC_H_ +#define _INTEL_LRC_H_ + +#endif /* _INTEL_LRC_H_ */ -- GitLab From 127f100369a1f302904335950387d566680eb275 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:11 +0100 Subject: [PATCH 0108/1868] drm/i915/bdw: Macro for LRCs and module option for Execlists GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". These expanded contexts enable a number of new abilities, especially "Execlists". The macro is defined to off until we have things in place to hope to work. v2: Rename "advanced contexts" to the more correct "logical ring contexts". v3: Add a module parameter to enable execlists. Execlist are relatively new, and so it'd be wise to be able to switch back to ring submission to debug subtle problems that will inevitably arise. v4: Add an intel_enable_execlists function. v5: Sanitize early, as suggested by Daniel. Remove lrc_enabled. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Damien Lespiau (v3) Signed-off-by: Oscar Mateo (v2, v4 & v5) Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_params.c | 6 ++++++ drivers/gpu/drm/i915/intel_lrc.c | 11 +++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 3 +++ 5 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec2a094f3622..fd2aa15ce02b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2062,6 +2062,7 @@ struct drm_i915_cmd_table { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) +#define HAS_LOGICAL_RING_CONTEXTS(dev) 0 #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) @@ -2149,6 +2150,7 @@ struct i915_params { int enable_rc6; int enable_fbc; int enable_ppgtt; + int enable_execlists; int enable_psr; unsigned int preliminary_hw_support; int disable_power_well; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3eec344bdac0..5646e9ba6383 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4723,6 +4723,9 @@ int i915_gem_init(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret; + i915.enable_execlists = intel_sanitize_enable_execlists(dev, + i915.enable_execlists); + mutex_lock(&dev->struct_mutex); if (IS_VALLEYVIEW(dev)) { diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 62ee8308d682..f7f8350c3793 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -35,6 +35,7 @@ struct i915_params i915 __read_mostly = { .vbt_sdvo_panel_type = -1, .enable_rc6 = -1, .enable_fbc = -1, + .enable_execlists = -1, .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = 1, @@ -118,6 +119,11 @@ MODULE_PARM_DESC(enable_ppgtt, "Override PPGTT usage. " "(-1=auto [default], 0=disabled, 1=aliasing, 2=full)"); +module_param_named(enable_execlists, i915.enable_execlists, int, 0400); +MODULE_PARM_DESC(enable_execlists, + "Override execlists usage. " + "(-1=auto [default], 0=disabled, 1=enabled)"); + module_param_named(enable_psr, i915.enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: true)"); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 49bb6fcdface..21f7f1cce86e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -40,3 +40,14 @@ #include #include #include "i915_drv.h" + +int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) +{ + if (enable_execlists == 0) + return 0; + + if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev)) + return 1; + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index f6830a4ec773..75ee9c3cb7dc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,4 +24,7 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Execlists */ +int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); + #endif /* _INTEL_LRC_H_ */ -- GitLab From bd84b1e995918ad83bdba5d5be1bef901e169f19 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 15:57:57 +0200 Subject: [PATCH 0109/1868] drm/i915: WARN if module opt sanitization goes out of order Depending upon one module option to be sanitized (through USES_PPGTT) for the other is a bit too fragile for my taste. At least WARN about this. Cc: Ben Widawsky Cc: Damien Lespiau Cc: Oscar Mateo Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 21f7f1cce86e..44721292eb77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -43,6 +43,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { + WARN_ON(i915.enable_ppgtt == -1); + if (enable_execlists == 0) return 0; -- GitLab From ede7d42baeece583c864badb6f9081f4cded6c32 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:12 +0100 Subject: [PATCH 0110/1868] drm/i915/bdw: Initialization for Logical Ring Contexts For the moment this is just a placeholder, but it shows one of the main differences between the good ol' HW contexts and the shiny new Logical Ring Contexts: LR contexts allocate and free their own backing objects. Another difference is that the allocation is deferred (as the create function name suggests), but that does not happen in this patch yet, because for the moment we are only dealing with the default context. Early in the series we had our own gen8_gem_context_init/fini functions, but the truth is they now look almost the same as the legacy hw context init/fini functions. We can always split them later if this ceases to be the case. Also, we do not fall back to legacy ringbuffers when logical ring context initialization fails (not very likely to happen and, even if it does, hw contexts would probably fail as well). v2: Daniel says "explain, do not showcase". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: s/BUG_ON/WARN_ON/.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 29 ++++++++++++++++++++----- drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3b99390e467a..3552a351ccf7 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -182,7 +182,10 @@ void i915_gem_context_free(struct kref *ctx_ref) typeof(*ctx), ref); struct i915_hw_ppgtt *ppgtt = NULL; - if (ctx->legacy_hw_ctx.rcs_state) { + if (i915.enable_execlists) { + ppgtt = ctx_to_ppgtt(ctx); + intel_lr_context_free(ctx); + } else if (ctx->legacy_hw_ctx.rcs_state) { /* We refcount even the aliasing PPGTT to keep the code symmetric */ if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev)) ppgtt = ctx_to_ppgtt(ctx); @@ -417,7 +420,11 @@ int i915_gem_context_init(struct drm_device *dev) if (WARN_ON(dev_priv->ring[RCS].default_context)) return 0; - if (HAS_HW_CONTEXTS(dev)) { + if (i915.enable_execlists) { + /* NB: intentionally left blank. We will allocate our own + * backing objects as we need them, thank you very much */ + dev_priv->hw_context_size = 0; + } else if (HAS_HW_CONTEXTS(dev)) { dev_priv->hw_context_size = round_up(get_context_size(dev), 4096); if (dev_priv->hw_context_size > (1<<20)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", @@ -433,11 +440,20 @@ int i915_gem_context_init(struct drm_device *dev) return PTR_ERR(ctx); } - /* NB: RCS will hold a ref for all rings */ - for (i = 0; i < I915_NUM_RINGS; i++) - dev_priv->ring[i].default_context = ctx; + for (i = 0; i < I915_NUM_RINGS; i++) { + struct intel_engine_cs *ring = &dev_priv->ring[i]; + + /* NB: RCS will hold a ref for all rings */ + ring->default_context = ctx; + + /* FIXME: we really only want to do this for initialized rings */ + if (i915.enable_execlists) + intel_lr_context_deferred_create(ctx, ring); + } - DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->hw_context_size ? "HW" : "fake"); + DRM_DEBUG_DRIVER("%s context support initialized\n", + i915.enable_execlists ? "LR" : + dev_priv->hw_context_size ? "HW" : "fake"); return 0; } @@ -779,6 +795,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct intel_context *ctx; int ret; + /* FIXME: allow user-created LR contexts as well */ if (!hw_context_enabled(dev)) return -ENODEV; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 44721292eb77..2d82d52d18bb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -53,3 +53,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } + +void intel_lr_context_free(struct intel_context *ctx) +{ + /* TODO */ +} + +int intel_lr_context_deferred_create(struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + + /* TODO */ + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 75ee9c3cb7dc..3b93572431e3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,11 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Logical Ring Contexts */ +void intel_lr_context_free(struct intel_context *ctx); +int intel_lr_context_deferred_create(struct intel_context *ctx, + struct intel_engine_cs *ring); + /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); -- GitLab From c9e003af2d44d9f6eafe855448c41c9ac08ae895 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:13 +0100 Subject: [PATCH 0111/1868] drm/i915/bdw: Introduce one context backing object per engine A context backing object only makes sense for a given engine (because it holds state data specific to that engine). In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really perform is for the render engine, so one backing object is all we nee. With Execlists, however, we need backing objects for every engine, as contexts become the only way to submit workloads to the GPU. To tackle this problem, we multiplex the context struct to contain objects. Originally, I colored this code by instantiating one new context for every engine I wanted to use, but this change suggested by Brad Volkin makes it more elegant. v2: Leave the old backing object pointer behind. Daniel Vetter suggested using a union, but it makes more sense to keep rcs_state as a NULL pointer behind, to make sure no one uses it incorrectly when Execlists are enabled, similar to what he suggested for ring->buffer (Rusty's API level 5). v3: Use the name "state" instead of the too-generic "obj", so that it mirrors the name choice for the legacy rcs_state. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fd2aa15ce02b..ad70e8ec18bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -626,11 +626,17 @@ struct intel_context { struct i915_ctx_hang_stats hang_stats; struct i915_address_space *vm; + /* Legacy ring buffer submission */ struct { struct drm_i915_gem_object *rcs_state; bool initialized; } legacy_hw_ctx; + /* Execlists */ + struct { + struct drm_i915_gem_object *state; + } engine[I915_NUM_RINGS]; + struct list_head link; }; -- GitLab From 8c8579176a144b1dca1d99ebb92510924168d508 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:14 +0100 Subject: [PATCH 0112/1868] drm/i915/bdw: A bit more advanced LR context alloc/free Now that we have the ability to allocate our own context backing objects and we have multiplexed one of them per engine inside the context structs, we can finally allocate and free them correctly. Regarding the context size, reading the register to calculate the sizes can work, I think, however the docs are very clear about the actual context sizes on GEN8, so just hardcode that and use it. v2: Rebased on top of the Full PPGTT series. It is important to notice that at this point we have one global default context per engine, all of them using the aliasing PPGTT (as opposed to the single global default context we have with legacy HW contexts). v3: - Go back to one single global default context, this time with multiple backing objects inside. - Use different context sizes for non-render engines, as suggested by Damien (still hardcoded, since the information about the context size registers in the BSpec is, well, *lacking*). - Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien). - Move default context backing object creation to intel_init_ring (so that we don't waste memory in rings that might not get initialized). v4: - Reuse the HW legacy context init/fini. - Create a separate free function. - Rename the functions with an intel_ preffix. v5: Several rebases to account for the changes in the previous patches. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 59 ++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad70e8ec18bc..cbae19bab4bf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2528,6 +2528,8 @@ int i915_switch_context(struct intel_engine_cs *ring, struct intel_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); void i915_gem_context_free(struct kref *ctx_ref); +struct drm_i915_gem_object * +i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); static inline void i915_gem_context_reference(struct intel_context *ctx) { kref_get(&ctx->ref); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3552a351ccf7..9f8fbbacf6c0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -199,7 +199,7 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } -static struct drm_i915_gem_object * +struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2d82d52d18bb..9f30ee80e487 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -41,6 +41,11 @@ #include #include "i915_drv.h" +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_ALIGN 4096 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -56,15 +61,65 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists void intel_lr_context_free(struct intel_context *ctx) { - /* TODO */ + int i; + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + if (ctx_obj) { + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + } + } +} + +static uint32_t get_lr_context_size(struct intel_engine_cs *ring) +{ + int ret = 0; + + WARN_ON(INTEL_INFO(ring->dev)->gen != 8); + + switch (ring->id) { + case RCS: + ret = GEN8_LR_CONTEXT_RENDER_SIZE; + break; + case VCS: + case BCS: + case VECS: + case VCS2: + ret = GEN8_LR_CONTEXT_OTHER_SIZE; + break; + } + + return ret; } int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { + struct drm_device *dev = ring->dev; + struct drm_i915_gem_object *ctx_obj; + uint32_t context_size; + int ret; + WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); - /* TODO */ + context_size = round_up(get_lr_context_size(ring), 4096); + + ctx_obj = i915_gem_alloc_context_obj(dev, context_size); + if (IS_ERR(ctx_obj)) { + ret = PTR_ERR(ctx_obj); + DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret); + return ret; + } + + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); + if (ret) { + DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].state = ctx_obj; return 0; } -- GitLab From 84c2377fcee7a43cd964b62143e9a3714130bb0c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:15 +0100 Subject: [PATCH 0113/1868] drm/i915/bdw: Allocate ringbuffers for Logical Ring Contexts As we have said a couple of times by now, logical ring contexts have their own ringbuffers: not only the backing pages, but the whole management struct. In a previous version of the series, this was achieved with two separate patches: drm/i915/bdw: Allocate ringbuffer backing objects for default global LRC drm/i915/bdw: Allocate ringbuffer for user-created LRCs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 38 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cbae19bab4bf..eccb8e406e9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -635,6 +635,7 @@ struct intel_context { /* Execlists */ struct { struct drm_i915_gem_object *state; + struct intel_ringbuffer *ringbuf; } engine[I915_NUM_RINGS]; struct list_head link; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9f30ee80e487..0c80bb1f5420 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -65,7 +65,11 @@ void intel_lr_context_free(struct intel_context *ctx) for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; + if (ctx_obj) { + intel_destroy_ringbuffer_obj(ringbuf); + kfree(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(&ctx_obj->base); } @@ -99,6 +103,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, struct drm_device *dev = ring->dev; struct drm_i915_gem_object *ctx_obj; uint32_t context_size; + struct intel_ringbuffer *ringbuf; int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); @@ -119,6 +124,39 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); + if (!ringbuf) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", + ring->name); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + ret = -ENOMEM; + return ret; + } + + ringbuf->size = 32 * PAGE_SIZE; + ringbuf->effective_size = ringbuf->size; + ringbuf->head = 0; + ringbuf->tail = 0; + ringbuf->space = ringbuf->size; + ringbuf->last_retired_head = -1; + + /* TODO: For now we put this in the mappable region so that we can reuse + * the existing ringbuffer code which ioremaps it. When we start + * creating many contexts, this will no longer work and we must switch + * to a kmapish interface. + */ + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", + ring->name, ret); + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a059b64a0fb2..064652034d7e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1519,7 +1519,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { if (!ringbuf->obj) return; @@ -1530,8 +1530,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) ringbuf->obj = NULL; } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 70525d0c2c74..669cc7527f9a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -355,6 +355,10 @@ intel_write_status_page(struct intel_engine_cs *ring, #define I915_GEM_HWS_SCRATCH_INDEX 0x30 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf); +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf); + void intel_stop_ring_buffer(struct intel_engine_cs *ring); void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); -- GitLab From 0c7dd53b84def4fbbba907bef3d32a5171b617a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 16:17:44 +0200 Subject: [PATCH 0114/1868] drm/i915/bdw: Add a context and an engine pointers to the ringbuffer Any given ringbuffer is unequivocally tied to one context and one engine. By setting the appropriate pointers to them, the ringbuffer struct holds all the infromation you might need to submit a workload for processing, Execlists style. v2: Drop ring->ctx since that looks terribly ill-defined for legacy ringbuffer submission. Signed-off-by: Oscar Mateo (v1) Acked-by: Damien Lespiau (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c80bb1f5420..8f2d14da6228 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -134,6 +134,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf->ring = ring; ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 064652034d7e..c35f956ed6a0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); ringbuf->size = 32 * PAGE_SIZE; + ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); init_waitqueue_head(&ring->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 669cc7527f9a..fe9d9d9d3598 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,6 +90,8 @@ struct intel_ringbuffer { struct drm_i915_gem_object *obj; void __iomem *virtual_start; + struct intel_engine_cs *ring; + u32 head; u32 tail; int space; -- GitLab From 8670d6f97d8c19595950af1838f8458d7529825f Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:17 +0100 Subject: [PATCH 0115/1868] drm/i915/bdw: Populate LR contexts (somewhat) For the most part, logical ring context objects are similar to hardware contexts in that the backing object is meant to be opaque. There are some exceptions where we need to poke certain offsets of the object for initialization, updating the tail pointer or updating the PDPs. For our basic execlist implementation we'll only need our PPGTT PDs, and ringbuffer addresses in order to set up the context. With previous patches, we have both, so start prepping the context to be load. Before running a context for the first time you must populate some fields in the context object. These fields begin 1 PAGE + LRCA, ie. the first page (in 0 based counting) of the context image. These same fields will be read and written to as contexts are saved and restored once the system is up and running. Many of these fields are completely reused from previous global registers: ringbuffer head/tail/control, context control matches some previous MI_SET_CONTEXT flags, and page directories. There are other fields which we don't touch which we may want in the future. v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11) for other engines. v3: Several rebases and general changes to the code. v4: Squash with "Extract LR context object populating" Also, Damien's review comments: - Set the Force Posted bit on the LRI header, as the BSpec suggest we do. - Prevent warning when compiling a 32-bits kernel without HIGHMEM64. - Add a clarifying comment to the context population code. v5: Damien's review comments: - The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted. - Remove dead code. v6: Add a note about the (presumed) differences between BDW and CHV state contexts. Also, Brad's review comments: - Use the _MASKED_BIT_ENABLE, upper_32_bits and lower_32_bits macros. - Be less magical about how we set the ring size in the context. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Rafael Barbalho (v2) Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 159 ++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7a6cc69cdc2b..c1d24242a02d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -282,6 +282,7 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_FORCE_POSTED (1<<12) #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1) #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f2d14da6228..a7a08a85edb3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -46,6 +46,38 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) + +#define CTX_LRI_HEADER_0 0x01 +#define CTX_CONTEXT_CONTROL 0x02 +#define CTX_RING_HEAD 0x04 +#define CTX_RING_TAIL 0x06 +#define CTX_RING_BUFFER_START 0x08 +#define CTX_RING_BUFFER_CONTROL 0x0a +#define CTX_BB_HEAD_U 0x0c +#define CTX_BB_HEAD_L 0x0e +#define CTX_BB_STATE 0x10 +#define CTX_SECOND_BB_HEAD_U 0x12 +#define CTX_SECOND_BB_HEAD_L 0x14 +#define CTX_SECOND_BB_STATE 0x16 +#define CTX_BB_PER_CTX_PTR 0x18 +#define CTX_RCS_INDIRECT_CTX 0x1a +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c +#define CTX_LRI_HEADER_1 0x21 +#define CTX_CTX_TIMESTAMP 0x22 +#define CTX_PDP3_UDW 0x24 +#define CTX_PDP3_LDW 0x26 +#define CTX_PDP2_UDW 0x28 +#define CTX_PDP2_LDW 0x2a +#define CTX_PDP1_UDW 0x2c +#define CTX_PDP1_LDW 0x2e +#define CTX_PDP0_UDW 0x30 +#define CTX_PDP0_LDW 0x32 +#define CTX_LRI_HEADER_2 0x41 +#define CTX_R_PWR_CLK_STATE 0x42 +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -59,6 +91,115 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +static int +populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) +{ + struct drm_i915_gem_object *ring_obj = ringbuf->obj; + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct page *page; + uint32_t *reg_state; + int ret; + + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + if (ret) { + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); + return ret; + } + + ret = i915_gem_object_get_pages(ctx_obj); + if (ret) { + DRM_DEBUG_DRIVER("Could not get object pages\n"); + return ret; + } + + i915_gem_object_pin_pages(ctx_obj); + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM + * commands followed by (reg, value) pairs. The values we are setting here are + * only for the first context restore: on a subsequent save, the GPU will + * recreate this batchbuffer with new values (including all the missing + * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ + if (ring->id == RCS) + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14); + else + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11); + reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED; + reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); + reg_state[CTX_CONTEXT_CONTROL+1] = + _MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT); + reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); + reg_state[CTX_RING_HEAD+1] = 0; + reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); + reg_state[CTX_RING_TAIL+1] = 0; + reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); + reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); + reg_state[CTX_RING_BUFFER_CONTROL+1] = + ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID; + reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168; + reg_state[CTX_BB_HEAD_U+1] = 0; + reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140; + reg_state[CTX_BB_HEAD_L+1] = 0; + reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110; + reg_state[CTX_BB_STATE+1] = (1<<5); + reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c; + reg_state[CTX_SECOND_BB_HEAD_U+1] = 0; + reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114; + reg_state[CTX_SECOND_BB_HEAD_L+1] = 0; + reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; + reg_state[CTX_SECOND_BB_STATE+1] = 0; + if (ring->id == RCS) { + /* TODO: according to BSpec, the register state context + * for CHV does not have these. OTOH, these registers do + * exist in CHV. I'm waiting for a clarification */ + reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; + reg_state[CTX_BB_PER_CTX_PTR+1] = 0; + reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; + reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; + } + reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); + reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; + reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; + reg_state[CTX_CTX_TIMESTAMP+1] = 0; + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]); + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]); + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]); + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]); + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]); + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]); + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]); + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]); + if (ring->id == RCS) { + reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; + reg_state[CTX_R_PWR_CLK_STATE+1] = 0; + } + + kunmap_atomic(reg_state); + + ctx_obj->dirty = 1; + set_page_dirty(page); + i915_gem_object_unpin_pages(ctx_obj); + + return 0; +} + void intel_lr_context_free(struct intel_context *ctx) { int i; @@ -151,14 +292,24 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, if (ret) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", ring->name, ret); - kfree(ringbuf); - i915_gem_object_ggtt_unpin(ctx_obj); - drm_gem_object_unreference(&ctx_obj->base); - return ret; + goto error; + } + + ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); + intel_destroy_ringbuffer_obj(ringbuf); + goto error; } ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; + +error: + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; } -- GitLab From ec3e9963a681789860e5c0120a745b717d942392 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:18 +0100 Subject: [PATCH 0116/1868] drm/i915/bdw: Deferred creation of user-created LRCs The backing objects and ringbuffers for contexts created via open fd are actually empty until the user starts sending execbuffers to them. At that point, we allocate & populate them. We do this because, at create time, we really don't know which engine is going to be used with the context later on (and we don't want to waste memory on objects that we might never use). v2: As contexts created via ioctl can only be used with the render ring, we have enough information to allocate & populate them right away. v3: Defer the creation always, even with ioctl-created contexts, as requested by Daniel Vetter. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 7 +++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9f8fbbacf6c0..bcb41002aa13 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -782,9 +782,9 @@ int i915_switch_context(struct intel_engine_cs *ring, return do_switch(ring, to); } -static bool hw_context_enabled(struct drm_device *dev) +static bool contexts_enabled(struct drm_device *dev) { - return to_i915(dev)->hw_context_size; + return i915.enable_execlists || to_i915(dev)->hw_context_size; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, @@ -795,8 +795,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct intel_context *ctx; int ret; - /* FIXME: allow user-created LR contexts as well */ - if (!hw_context_enabled(dev)) + if (!contexts_enabled(dev)) return -ENODEV; ret = i915_mutex_lock_interruptible(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dec2cc2fbd42..29cb2156d32b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -931,6 +931,14 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EIO); } + if (i915.enable_execlists && !ctx->engine[ring->id].state) { + int ret = intel_lr_context_deferred_create(ctx, ring); + if (ret) { + DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); + return ERR_PTR(ret); + } + } + return ctx; } -- GitLab From a83014d3f8b936778a9bc9b3d4137769bb26d9eb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:21 +0100 Subject: [PATCH 0117/1868] drm/i915: Abstract the legacy workload submission mechanism away As suggested by Daniel Vetter. The idea, in subsequent patches, is to provide an alternative to these vfuncs for the Execlists submission mechanism. v2: Splitted into two and reordered to illustrate our intentions, instead of showing it off. Also, remove the add_request vfunc and added the stop_ring one. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: - Make checkpatch happy. - Be grumpy about the excessive vtable. - Ditch gt->is_ring_initialized.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 23 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 +++++++++---------- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eccb8e406e9c..9198f1c96470 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1645,6 +1645,20 @@ struct drm_i915_private { /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ + struct { + int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); + int (*init_rings)(struct drm_device *dev); + void (*cleanup_ring)(struct intel_engine_cs *ring); + void (*stop_ring)(struct intel_engine_cs *ring); + } gt; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -2252,6 +2266,14 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_ringbuffer_submission(struct drm_device *dev, + struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, @@ -2404,6 +2426,7 @@ void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); +int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice); void i915_gem_init_swizzling(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5646e9ba6383..33a54cbf9a2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4495,7 +4495,7 @@ i915_gem_stop_ringbuffers(struct drm_device *dev) int i; for_each_ring(ring, dev_priv, i) - intel_stop_ring_buffer(ring); + dev_priv->gt.stop_ring(ring); } int @@ -4612,7 +4612,7 @@ intel_enable_blt(struct drm_device *dev) return true; } -static int i915_gem_init_rings(struct drm_device *dev) +int i915_gem_init_rings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int ret; @@ -4695,7 +4695,7 @@ i915_gem_init_hw(struct drm_device *dev) i915_gem_init_swizzling(dev); - ret = i915_gem_init_rings(dev); + ret = dev_priv->gt.init_rings(dev); if (ret) return ret; @@ -4736,6 +4736,13 @@ int i915_gem_init(struct drm_device *dev) DRM_DEBUG_DRIVER("allow wake ack timed out\n"); } + if (!i915.enable_execlists) { + dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; + dev_priv->gt.init_rings = i915_gem_init_rings; + dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; + dev_priv->gt.stop_ring = intel_stop_ring_buffer; + } + i915_gem_init_userptr(dev); i915_gem_init_global_gtt(dev); @@ -4771,7 +4778,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) int i; for_each_ring(ring, dev_priv, i) - intel_cleanup_ring_buffer(ring); + dev_priv->gt.cleanup_ring(ring); } int diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 29cb2156d32b..26b38b3ae4f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1023,14 +1023,14 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } -static int -legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags) +int +i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags) { struct drm_clip_rect *cliprects = NULL; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1402,8 +1402,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } else exec_start += i915_gem_obj_offset(batch_obj, vm); - ret = legacy_ringbuffer_submission(dev, file, ring, ctx, - args, &eb->vmas, batch_obj, exec_start, flags); + ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, + &eb->vmas, batch_obj, exec_start, flags); /* * FIXME: We crucially rely upon the active tracking for the (ppgtt) -- GitLab From 454afebde873874b939465bfc1a294ac3697c96e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:22 +0100 Subject: [PATCH 0118/1868] drm/i915/bdw: Skeleton for the new logical rings submission path Execlists are indeed a brave new world with respect to workload submission to the GPU. In previous version of these series, I have tried to impact the legacy ringbuffer submission path as little as possible (mostly, passing the context around and using the correct ringbuffer when I needed one) but Daniel is afraid (probably with a reason) that these changes and, especially, future ones, will end up breaking older gens. This commit and some others coming next will try to limit the damage by creating an alternative path for workload submission. The first step is here: laying out a new ring init/fini. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 5 + drivers/gpu/drm/i915/intel_lrc.c | 151 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 12 +++ 3 files changed, 168 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33a54cbf9a2e..9acb2469116a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4741,6 +4741,11 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.init_rings = i915_gem_init_rings; dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; dev_priv->gt.stop_ring = intel_stop_ring_buffer; + } else { + dev_priv->gt.do_execbuf = intel_execlists_submission; + dev_priv->gt.init_rings = intel_logical_rings_init; + dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; + dev_priv->gt.stop_ring = intel_logical_ring_stop; } i915_gem_init_userptr(dev); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a7a08a85edb3..9c2ff8f11c90 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -91,6 +91,157 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags) +{ + /* TODO */ + return 0; +} + +void intel_logical_ring_stop(struct intel_engine_cs *ring) +{ + /* TODO */ +} + +void intel_logical_ring_cleanup(struct intel_engine_cs *ring) +{ + /* TODO */ +} + +static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) +{ + /* TODO */ + return 0; +} + +static int logical_render_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + + ring->name = "render ring"; + ring->id = RCS; + ring->mmio_base = RENDER_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_bsd_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VCS]; + + ring->name = "bsd ring"; + ring->id = VCS; + ring->mmio_base = GEN6_BSD_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_bsd2_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; + + ring->name = "bds2 ring"; + ring->id = VCS2; + ring->mmio_base = GEN8_BSD2_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_blt_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[BCS]; + + ring->name = "blitter ring"; + ring->id = BCS; + ring->mmio_base = BLT_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_vebox_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VECS]; + + ring->name = "video enhancement ring"; + ring->id = VECS; + ring->mmio_base = VEBOX_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +int intel_logical_rings_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = logical_render_ring_init(dev); + if (ret) + return ret; + + if (HAS_BSD(dev)) { + ret = logical_bsd_ring_init(dev); + if (ret) + goto cleanup_render_ring; + } + + if (HAS_BLT(dev)) { + ret = logical_blt_ring_init(dev); + if (ret) + goto cleanup_bsd_ring; + } + + if (HAS_VEBOX(dev)) { + ret = logical_vebox_ring_init(dev); + if (ret) + goto cleanup_blt_ring; + } + + if (HAS_BSD2(dev)) { + ret = logical_bsd2_ring_init(dev); + if (ret) + goto cleanup_vebox_ring; + } + + ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); + if (ret) + goto cleanup_bsd2_ring; + + return 0; + +cleanup_bsd2_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VCS2]); +cleanup_vebox_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VECS]); +cleanup_blt_ring: + intel_logical_ring_cleanup(&dev_priv->ring[BCS]); +cleanup_bsd_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VCS]); +cleanup_render_ring: + intel_logical_ring_cleanup(&dev_priv->ring[RCS]); + + return ret; +} + static int populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3b93572431e3..bf0eff4e9f08 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,11 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Logical Rings */ +void intel_logical_ring_stop(struct intel_engine_cs *ring); +void intel_logical_ring_cleanup(struct intel_engine_cs *ring); +int intel_logical_rings_init(struct drm_device *dev); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, @@ -31,5 +36,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); +int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); #endif /* _INTEL_LRC_H_ */ -- GitLab From 48d823878d64f93163f5a949623346748bbce1b4 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:23 +0100 Subject: [PATCH 0119/1868] drm/i915/bdw: Generic logical ring init and cleanup Allocate and populate the default LRC for every ring, call gen-specific init/cleanup, init/fini the command parser and set the status page (now inside the LRC object). These are things all engines/rings have in common. Stopping the ring before cleanup and initializing the seqnos is left as a TODO task (we need more infrastructure in place before we can achieve this). v2: Check the ringbuffer backing obj for ring_is_initialized, instead of the context backing obj (similar, but not exactly the same). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 -- drivers/gpu/drm/i915/intel_lrc.c | 54 ++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 ++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +-- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bcb41002aa13..7a08f3e9e1ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -445,10 +445,6 @@ int i915_gem_context_init(struct drm_device *dev) /* NB: RCS will hold a ref for all rings */ ring->default_context = ctx; - - /* FIXME: we really only want to do this for initialized rings */ - if (i915.enable_execlists) - intel_lr_context_deferred_create(ctx, ring); } DRM_DEBUG_DRIVER("%s context support initialized\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9c2ff8f11c90..ed7a4ff3bbd2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -110,12 +110,60 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { - /* TODO */ + if (!intel_ring_initialized(ring)) + return; + + /* TODO: make sure the ring is stopped */ + ring->preallocated_lazy_request = NULL; + ring->outstanding_lazy_seqno = 0; + + if (ring->cleanup) + ring->cleanup(ring); + + i915_cmd_parser_fini_ring(ring); + + if (ring->status_page.obj) { + kunmap(sg_page(ring->status_page.obj->pages->sgl)); + ring->status_page.obj = NULL; + } } static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) { - /* TODO */ + int ret; + struct intel_context *dctx = ring->default_context; + struct drm_i915_gem_object *dctx_obj; + + /* Intentionally left blank. */ + ring->buffer = NULL; + + ring->dev = dev; + INIT_LIST_HEAD(&ring->active_list); + INIT_LIST_HEAD(&ring->request_list); + init_waitqueue_head(&ring->irq_queue); + + ret = intel_lr_context_deferred_create(dctx, ring); + if (ret) + return ret; + + /* The status page is offset 0 from the context object in LRCs. */ + dctx_obj = dctx->engine[ring->id].state; + ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj); + ring->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl)); + if (ring->status_page.page_addr == NULL) + return -ENOMEM; + ring->status_page.obj = dctx_obj; + + ret = i915_cmd_parser_init_ring(ring); + if (ret) + return ret; + + if (ring->init) { + ret = ring->init(ring); + if (ret) + return ret; + } + return 0; } @@ -399,6 +447,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + if (ctx->engine[ring->id].state) + return 0; context_size = round_up(get_lr_context_size(ring), 4096); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c35f956ed6a0..e4b97f5c5797 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -40,6 +40,23 @@ */ #define CACHELINE_BYTES 64 +bool +intel_ring_initialized(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (!dev) + return false; + + if (i915.enable_execlists) { + struct intel_context *dctx = ring->default_context; + struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf; + + return ringbuf->obj; + } else + return ring->buffer && ring->buffer->obj; +} + static inline int __ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fe9d9d9d3598..fbe54ef6a9a1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,7 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline bool -intel_ring_initialized(struct intel_engine_cs *ring) -{ - return ring->buffer && ring->buffer->obj; -} +bool intel_ring_initialized(struct intel_engine_cs *ring); static inline unsigned intel_ring_flag(struct intel_engine_cs *ring) -- GitLab From 9b1136d505b1de5478e11b59ca59cf8ce2a33217 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:24 +0100 Subject: [PATCH 0120/1868] drm/i915/bdw: GEN-specific logical ring init Logical rings do not need most of the initialization their legacy ringbuffer counterparts do: we just need the pipe control object for the render ring, enable Execlists on the hardware and a few workarounds. v2: Squash with: "drm/i915: Extract pipe control fini & make init outside accesible". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Make checkpatch happy.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 34 ++++++++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 3 files changed, 78 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ed7a4ff3bbd2..1a1f5f98f05b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,49 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +static int gen8_init_common_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + POSTING_READ(RING_MODE_GEN7(ring)); + DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name); + + memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); + + return 0; +} + +static int gen8_init_render_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = gen8_init_common_ring(ring); + if (ret) + return ret; + + /* We need to disable the AsyncFlip performance optimisations in order + * to use MI_WAIT_FOR_EVENT within the CS. It should already be + * programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv + */ + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + + ret = intel_init_pipe_control(ring); + if (ret) + return ret; + + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + + return ret; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -178,6 +221,9 @@ static int logical_render_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->init = gen8_init_render_ring; + ring->cleanup = intel_fini_pipe_control; + return logical_ring_init(dev, ring); } @@ -192,6 +238,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -206,6 +254,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -220,6 +270,8 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -234,6 +286,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e4b97f5c5797..dab5e7c79036 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -597,8 +597,25 @@ static int init_ring_common(struct intel_engine_cs *ring) return ret; } -static int -init_pipe_control(struct intel_engine_cs *ring) +void +intel_fini_pipe_control(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (ring->scratch.obj == NULL) + return; + + if (INTEL_INFO(dev)->gen >= 5) { + kunmap(sg_page(ring->scratch.obj->pages->sgl)); + i915_gem_object_ggtt_unpin(ring->scratch.obj); + } + + drm_gem_object_unreference(&ring->scratch.obj->base); + ring->scratch.obj = NULL; +} + +int +intel_init_pipe_control(struct intel_engine_cs *ring) { int ret; @@ -673,7 +690,7 @@ static int init_render_ring(struct intel_engine_cs *ring) _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); if (INTEL_INFO(dev)->gen >= 5) { - ret = init_pipe_control(ring); + ret = intel_init_pipe_control(ring); if (ret) return ret; } @@ -708,16 +725,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) dev_priv->semaphore_obj = NULL; } - if (ring->scratch.obj == NULL) - return; - - if (INTEL_INFO(dev)->gen >= 5) { - kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(ring->scratch.obj); - } - - drm_gem_object_unreference(&ring->scratch.obj->base); - ring->scratch.obj = NULL; + intel_fini_pipe_control(ring); } static int gen8_rcs_signal(struct intel_engine_cs *signaller, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fbe54ef6a9a1..677df0d7be48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -381,6 +381,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_engine_cs *ring); int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); +void intel_fini_pipe_control(struct intel_engine_cs *ring); +int intel_init_pipe_control(struct intel_engine_cs *ring); + int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); int intel_init_bsd2_ring_buffer(struct drm_device *dev); -- GitLab From f72a113a71ab08c4df8a5f80ab2f8a140feb81f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:00 +0200 Subject: [PATCH 0121/1868] drm/radeon: add userptr support v8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds an IOCTL for turning a pointer supplied by userspace into a buffer object. It imposes several restrictions upon the memory being mapped: 1. It must be page aligned (both start/end addresses, i.e ptr and size). 2. It must be normal system memory, not a pointer into another map of IO space (e.g. it must not be a GTT mmapping of another object). 3. The BO is mapped into GTT, so the maximum amount of memory mapped at all times is still the GTT limit. 4. The BO is only mapped readonly for now, so no write support. 5. List of backing pages is only acquired once, so they represent a snapshot of the first use. Exporting and sharing as well as mapping of buffer objects created by this function is forbidden and results in an -EPERM. v2: squash all previous changes into first public version v3: fix tabs, map readonly, don't use MM callback any more v4: set TTM_PAGE_FLAG_SG so that TTM never messes with the pages, pin/unpin pages on bind/unbind instead of populate/unpopulate v5: rebased on 3.17-wip, IOCTL renamed to userptr, reject any unknown flags, better handle READONLY flag, improve permission check v6: fix ptr cast warning, use set_page_dirty/mark_page_accessed on unpin v7: add warning about it's availability in the API definition v8: drop access_ok check, fix VM mapping bits Signed-off-by: Christian König Reviewed-by: Alex Deucher (v4) Reviewed-by: Jérôme Glisse (v4) Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 6 + drivers/gpu/drm/radeon/radeon_cs.c | 25 ++++- drivers/gpu/drm/radeon/radeon_drv.c | 5 +- drivers/gpu/drm/radeon/radeon_gem.c | 68 ++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_object.c | 3 + drivers/gpu/drm/radeon/radeon_prime.c | 10 ++ drivers/gpu/drm/radeon/radeon_ttm.c | 145 +++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_vm.c | 3 + include/uapi/drm/radeon_drm.h | 16 +++ 10 files changed, 279 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 9e1732eb402c..6f38a23a5810 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2138,6 +2138,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data, @@ -2871,6 +2873,10 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain); extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); +extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, + uint32_t flags); +extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm); +extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm); extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base); extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index ee712c199b25..1321491cf499 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -78,7 +78,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) struct radeon_cs_chunk *chunk; struct radeon_cs_buckets buckets; unsigned i, j; - bool duplicate; + bool duplicate, need_mmap_lock = false; + int r; if (p->chunk_relocs_idx == -1) { return 0; @@ -164,6 +165,19 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].allowed_domains = domain; } + if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { + uint32_t domain = p->relocs[i].prefered_domains; + if (!(domain & RADEON_GEM_DOMAIN_GTT)) { + DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " + "allowed for userptr BOs\n"); + return -EINVAL; + } + need_mmap_lock = true; + domain = RADEON_GEM_DOMAIN_GTT; + p->relocs[i].prefered_domains = domain; + p->relocs[i].allowed_domains = domain; + } + p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; @@ -176,8 +190,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->cs_flags & RADEON_CS_USE_VM) p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, &p->validated); + if (need_mmap_lock) + down_read(¤t->mm->mmap_sem); + + r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); - return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); + if (need_mmap_lock) + up_read(¤t->mm->mmap_sem); + + return r; } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a773830c6c40..5b18af926527 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -114,6 +114,9 @@ int radeon_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv); void radeon_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); +struct dma_buf *radeon_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int flags, int *vpos, int *hpos, ktime_t *stime, @@ -568,7 +571,7 @@ static struct drm_driver kms_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = drm_gem_prime_export, + .gem_prime_export = radeon_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = radeon_gem_prime_pin, .gem_prime_unpin = radeon_gem_prime_unpin, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bfd7e1b0ff3f..993ab223b503 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -272,6 +272,65 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, return 0; } +int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_gem_userptr *args = data; + struct drm_gem_object *gobj; + struct radeon_bo *bo; + uint32_t handle; + int r; + + if (offset_in_page(args->addr | args->size)) + return -EINVAL; + + /* we only support read only mappings for now */ + if (!(args->flags & RADEON_GEM_USERPTR_READONLY)) + return -EACCES; + + /* reject unknown flag values */ + if (args->flags & ~RADEON_GEM_USERPTR_READONLY) + return -EINVAL; + + /* readonly pages not tested on older hardware */ + if (rdev->family < CHIP_R600) + return -EINVAL; + + down_read(&rdev->exclusive_lock); + + /* create a gem object to contain this object in */ + r = radeon_gem_object_create(rdev, args->size, 0, + RADEON_GEM_DOMAIN_CPU, 0, + false, &gobj); + if (r) + goto handle_lockup; + + bo = gem_to_radeon_bo(gobj); + r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); + if (r) + goto release_object; + + r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(gobj); + if (r) + goto handle_lockup; + + args->handle = handle; + up_read(&rdev->exclusive_lock); + return 0; + +release_object: + drm_gem_object_unreference_unlocked(gobj); + +handle_lockup: + up_read(&rdev->exclusive_lock); + r = radeon_gem_handle_lockup(rdev, r); + + return r; +} + int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -315,6 +374,10 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, return -ENOENT; } robj = gem_to_radeon_bo(gobj); + if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) { + drm_gem_object_unreference_unlocked(gobj); + return -EPERM; + } *offset_p = radeon_bo_mmap_offset(robj); drm_gem_object_unreference_unlocked(gobj); return 0; @@ -532,6 +595,11 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); + + r = -EPERM; + if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) + goto out; + r = radeon_bo_reserve(robj, false); if (unlikely(r)) goto out; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eb7164d07985..8309b11e674d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 480c87d8edc5..c73c1e320585 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -264,6 +264,9 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, { int r, i; + if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) + return -EPERM; + if (bo->pin_count) { bo->pin_count++; if (gpu_addr) diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index f7e48d329db3..bb18bc74b7d7 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -103,3 +103,13 @@ void radeon_gem_prime_unpin(struct drm_gem_object *obj) radeon_bo_unpin(bo); radeon_bo_unreserve(bo); } + +struct dma_buf *radeon_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags) +{ + struct radeon_bo *bo = gem_to_radeon_bo(gobj); + if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) + return ERR_PTR(-EPERM); + return drm_gem_prime_export(dev, gobj, flags); +} diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 72afe82a95c9..b20933fa35c6 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include "radeon_reg.h" #include "radeon.h" @@ -515,8 +517,92 @@ struct radeon_ttm_tt { struct ttm_dma_tt ttm; struct radeon_device *rdev; u64 offset; + + uint64_t userptr; + struct mm_struct *usermm; + uint32_t userflags; }; +/* prepare the sg table with the user pages */ +static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) +{ + struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); + struct radeon_ttm_tt *gtt = (void *)ttm; + unsigned pinned = 0, nents; + int r; + + int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + + if (current->mm != gtt->usermm) + return -EPERM; + + do { + unsigned num_pages = ttm->num_pages - pinned; + uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; + struct page **pages = ttm->pages + pinned; + + r = get_user_pages(current, current->mm, userptr, num_pages, + write, 0, pages, NULL); + if (r < 0) + goto release_pages; + + pinned += r; + + } while (pinned < ttm->num_pages); + + r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, + ttm->num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (r) + goto release_sg; + + r = -ENOMEM; + nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + if (nents != ttm->sg->nents) + goto release_sg; + + drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, + gtt->ttm.dma_address, ttm->num_pages); + + return 0; + +release_sg: + kfree(ttm->sg); + +release_pages: + release_pages(ttm->pages, pinned, 0); + return r; +} + +static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm) +{ + struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); + struct radeon_ttm_tt *gtt = (void *)ttm; + struct scatterlist *sg; + int i; + + int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + + /* free the sg table and pages again */ + dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + + for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) { + struct page *page = sg_page(sg); + + if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY)) + set_page_dirty(page); + + mark_page_accessed(page); + page_cache_release(page); + } + + sg_free_table(ttm->sg); +} + static int radeon_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -525,6 +611,11 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm, RADEON_GART_PAGE_WRITE; int r; + if (gtt->userptr) { + radeon_ttm_tt_pin_userptr(ttm); + flags &= ~RADEON_GART_PAGE_WRITE; + } + gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", @@ -547,6 +638,10 @@ static int radeon_ttm_backend_unbind(struct ttm_tt *ttm) struct radeon_ttm_tt *gtt = (void *)ttm; radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages); + + if (gtt->userptr) + radeon_ttm_tt_unpin_userptr(ttm); + return 0; } @@ -603,6 +698,16 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) if (ttm->state != tt_unpopulated) return 0; + if (gtt->userptr) { + ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + if (!ttm->sg) + return -ENOMEM; + + ttm->page_flags |= TTM_PAGE_FLAG_SG; + ttm->state = tt_unbound; + return 0; + } + if (slave && ttm->sg) { drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -652,6 +757,12 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) unsigned i; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + if (gtt->userptr) { + kfree(ttm->sg); + ttm->page_flags &= ~TTM_PAGE_FLAG_SG; + return; + } + if (slave) return; @@ -680,6 +791,40 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } +int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, + uint32_t flags) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return -EINVAL; + + gtt->userptr = addr; + gtt->usermm = current->mm; + gtt->userflags = flags; + return 0; +} + +bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return false; + + return !!gtt->userptr; +} + +bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return false; + + return !!(gtt->userflags & RADEON_GEM_USERPTR_READONLY); +} + static struct ttm_bo_driver radeon_bo_driver = { .ttm_tt_create = &radeon_ttm_tt_create, .ttm_tt_populate = &radeon_ttm_tt_populate, diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ccae4d9dc3de..0e107c5650bf 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -888,6 +888,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, bo_va->flags &= ~RADEON_VM_PAGE_VALID; bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; + if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) + bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; + if (mem) { addr = mem->start << PAGE_SHIFT; if (mem->mem_type != TTM_PL_SYSTEM) { diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 509b2d7a41b7..3a9f20930372 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -511,6 +511,7 @@ typedef struct { #define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_VA 0x2b #define DRM_RADEON_GEM_OP 0x2c +#define DRM_RADEON_GEM_USERPTR 0x2d #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -554,6 +555,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) +#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) typedef struct drm_radeon_init { enum { @@ -808,6 +810,20 @@ struct drm_radeon_gem_create { uint32_t flags; }; +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define RADEON_GEM_USERPTR_READONLY (1 << 0) + +struct drm_radeon_gem_userptr { + uint64_t addr; + uint64_t size; + uint32_t flags; + uint32_t handle; +}; + #define RADEON_TILING_MACRO 0x1 #define RADEON_TILING_MICRO 0x2 #define RADEON_TILING_SWAP_16BIT 0x4 -- GitLab From ddd00e33e17a62c5f44377ab42e7562ccfae7bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:01 +0200 Subject: [PATCH 0122/1868] drm/radeon: add userptr flag to limit it to anonymous memory v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid problems with writeback by limiting userptr to anonymous memory. v2: add commit and code comments Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 3 ++- drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++++ include/uapi/drm/radeon_drm.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 993ab223b503..032736b429bf 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -290,7 +290,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, return -EACCES; /* reject unknown flag values */ - if (args->flags & ~RADEON_GEM_USERPTR_READONLY) + if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | + RADEON_GEM_USERPTR_ANONONLY)) return -EINVAL; /* readonly pages not tested on older hardware */ diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index b20933fa35c6..12e37b1ddc40 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -538,6 +538,16 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) if (current->mm != gtt->usermm) return -EPERM; + if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) { + /* check that we only pin down anonymous memory + to prevent problems with writeback */ + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma; + vma = find_vma(gtt->usermm, gtt->userptr); + if (!vma || vma->vm_file || vma->vm_end < end) + return -EPERM; + } + do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 3a9f20930372..9720e1a36848 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -816,6 +816,7 @@ struct drm_radeon_gem_create { * perform any operation. */ #define RADEON_GEM_USERPTR_READONLY (1 << 0) +#define RADEON_GEM_USERPTR_ANONONLY (1 << 1) struct drm_radeon_gem_userptr { uint64_t addr; -- GitLab From 2a84a4476d6e13de72472f6ca4338aed0a8269b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:02 +0200 Subject: [PATCH 0123/1868] drm/radeon: add userptr flag to directly validate the BO to GTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we test userptr availability at BO creation time instead of first use. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 18 +++++++++++++++++- include/uapi/drm/radeon_drm.h | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 032736b429bf..450656027aba 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -291,7 +291,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* reject unknown flag values */ if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | - RADEON_GEM_USERPTR_ANONONLY)) + RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE)) return -EINVAL; /* readonly pages not tested on older hardware */ @@ -312,6 +312,22 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; + if (args->flags & RADEON_GEM_USERPTR_VALIDATE) { + down_read(¤t->mm->mmap_sem); + r = radeon_bo_reserve(bo, true); + if (r) { + up_read(¤t->mm->mmap_sem); + goto release_object; + } + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + radeon_bo_unreserve(bo); + up_read(¤t->mm->mmap_sem); + if (r) + goto release_object; + } + r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 9720e1a36848..5dc61c2d4c73 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -817,6 +817,7 @@ struct drm_radeon_gem_create { */ #define RADEON_GEM_USERPTR_READONLY (1 << 0) #define RADEON_GEM_USERPTR_ANONONLY (1 << 1) +#define RADEON_GEM_USERPTR_VALIDATE (1 << 2) struct drm_radeon_gem_userptr { uint64_t addr; -- GitLab From 341cb9e426fac32523427c80c67543a16be46605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:03 +0200 Subject: [PATCH 0124/1868] drm/radeon: add userptr flag to register MMU notifier v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever userspace mapping related to our userptr change we wait for it to become idle and unmap it from GTT. v2: rebased, fix mutex unlock in error path v3: improve commit message Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon.h | 12 ++ drivers/gpu/drm/radeon/radeon_device.c | 2 + drivers/gpu/drm/radeon/radeon_gem.c | 9 +- drivers/gpu/drm/radeon/radeon_mn.c | 272 +++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_object.c | 1 + include/uapi/drm/radeon_drm.h | 1 + 8 files changed, 298 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_mn.c diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index b066bb3ca01a..358b6e8697e9 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -115,6 +115,7 @@ config DRM_RADEON select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE + select MMU_NOTIFIER help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 0013ad0db9ef..c7fa1aeb8c3f 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o + ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o # add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 6f38a23a5810..542da8208674 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -487,6 +488,9 @@ struct radeon_bo { struct ttm_bo_kmap_obj dma_buf_vmap; pid_t pid; + + struct radeon_mn *mn; + struct interval_tree_node mn_it; }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) @@ -1725,6 +1729,11 @@ void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *cpB); void radeon_test_syncing(struct radeon_device *rdev); +/* + * MMU Notifier + */ +int radeon_mn_register(struct radeon_bo *bo, unsigned long addr); +void radeon_mn_unregister(struct radeon_bo *bo); /* * Debugfs @@ -2372,6 +2381,9 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + + struct mutex mn_lock; + DECLARE_HASHTABLE(mn_hash, 7); }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c8ea050c8fa4..c58f84f3c6a5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1270,6 +1270,8 @@ int radeon_device_init(struct radeon_device *rdev, init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); + mutex_init(&rdev->mn_lock); + hash_init(rdev->mn_hash); r = radeon_gem_init(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 450656027aba..2a6fbf101cf0 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -291,7 +291,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* reject unknown flag values */ if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | - RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE)) + RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE | + RADEON_GEM_USERPTR_REGISTER)) return -EINVAL; /* readonly pages not tested on older hardware */ @@ -312,6 +313,12 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; + if (args->flags & RADEON_GEM_USERPTR_REGISTER) { + r = radeon_mn_register(bo, args->addr); + if (r) + goto release_object; + } + if (args->flags & RADEON_GEM_USERPTR_VALIDATE) { down_read(¤t->mm->mmap_sem); r = radeon_bo_reserve(bo, true); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c new file mode 100644 index 000000000000..0157bc2f11f8 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -0,0 +1,272 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König + */ + +#include +#include +#include +#include +#include + +#include "radeon.h" + +struct radeon_mn { + /* constant after initialisation */ + struct radeon_device *rdev; + struct mm_struct *mm; + struct mmu_notifier mn; + + /* only used on destruction */ + struct work_struct work; + + /* protected by rdev->mn_lock */ + struct hlist_node node; + + /* objects protected by lock */ + struct mutex lock; + struct rb_root objects; +}; + +/** + * radeon_mn_destroy - destroy the rmn + * + * @work: previously sheduled work item + * + * Lazy destroys the notifier from a work item + */ +static void radeon_mn_destroy(struct work_struct *work) +{ + struct radeon_mn *rmn = container_of(work, struct radeon_mn, work); + struct radeon_device *rdev = rmn->rdev; + struct radeon_bo *bo, *next; + + mutex_lock(&rdev->mn_lock); + mutex_lock(&rmn->lock); + hash_del(&rmn->node); + rbtree_postorder_for_each_entry_safe(bo, next, &rmn->objects, mn_it.rb) { + interval_tree_remove(&bo->mn_it, &rmn->objects); + bo->mn = NULL; + } + mutex_unlock(&rmn->lock); + mutex_unlock(&rdev->mn_lock); + mmu_notifier_unregister(&rmn->mn, rmn->mm); + kfree(rmn); +} + +/** + * radeon_mn_release - callback to notify about mm destruction + * + * @mn: our notifier + * @mn: the mm this callback is about + * + * Shedule a work item to lazy destroy our notifier. + */ +static void radeon_mn_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + INIT_WORK(&rmn->work, radeon_mn_destroy); + schedule_work(&rmn->work); +} + +/** + * radeon_mn_invalidate_range_start - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We block for all BOs between start and end to be idle and + * unmap them by move them into system domain again. + */ +static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + mutex_lock(&rmn->lock); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct radeon_bo *bo; + int r; + + bo = container_of(it, struct radeon_bo, mn_it); + it = interval_tree_iter_next(it, start, end); + + r = radeon_bo_reserve(bo, true); + if (r) { + DRM_ERROR("(%d) failed to reserve user bo\n", r); + continue; + } + + if (bo->tbo.sync_obj) { + r = radeon_fence_wait(bo->tbo.sync_obj, false); + if (r) + DRM_ERROR("(%d) failed to wait for user bo\n", r); + } + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (r) + DRM_ERROR("(%d) failed to validate user bo\n", r); + + radeon_bo_unreserve(bo); + } + + mutex_unlock(&rmn->lock); +} + +static const struct mmu_notifier_ops radeon_mn_ops = { + .release = radeon_mn_release, + .invalidate_range_start = radeon_mn_invalidate_range_start, +}; + +/** + * radeon_mn_get - create notifier context + * + * @rdev: radeon device pointer + * + * Creates a notifier context for current->mm. + */ +static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) +{ + struct mm_struct *mm = current->mm; + struct radeon_mn *rmn; + int r; + + down_write(&mm->mmap_sem); + mutex_lock(&rdev->mn_lock); + + hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) + if (rmn->mm == mm) + goto release_locks; + + rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); + if (!rmn) { + rmn = ERR_PTR(-ENOMEM); + goto release_locks; + } + + rmn->rdev = rdev; + rmn->mm = mm; + rmn->mn.ops = &radeon_mn_ops; + mutex_init(&rmn->lock); + rmn->objects = RB_ROOT; + + r = __mmu_notifier_register(&rmn->mn, mm); + if (r) + goto free_rmn; + + hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm); + +release_locks: + mutex_unlock(&rdev->mn_lock); + up_write(&mm->mmap_sem); + + return rmn; + +free_rmn: + mutex_unlock(&rdev->mn_lock); + up_write(&mm->mmap_sem); + kfree(rmn); + + return ERR_PTR(r); +} + +/** + * radeon_mn_register - register a BO for notifier updates + * + * @bo: radeon buffer object + * @addr: userptr addr we should monitor + * + * Registers an MMU notifier for the given BO at the specified address. + * Returns 0 on success, -ERRNO if anything goes wrong. + */ +int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) +{ + unsigned long end = addr + radeon_bo_size(bo) - 1; + struct radeon_device *rdev = bo->rdev; + struct radeon_mn *rmn; + struct interval_tree_node *it; + + rmn = radeon_mn_get(rdev); + if (IS_ERR(rmn)) + return PTR_ERR(rmn); + + mutex_lock(&rmn->lock); + + it = interval_tree_iter_first(&rmn->objects, addr, end); + if (it) { + mutex_unlock(&rmn->lock); + return -EEXIST; + } + + bo->mn = rmn; + bo->mn_it.start = addr; + bo->mn_it.last = end; + interval_tree_insert(&bo->mn_it, &rmn->objects); + + mutex_unlock(&rmn->lock); + + return 0; +} + +/** + * radeon_mn_unregister - unregister a BO for notifier updates + * + * @bo: radeon buffer object + * + * Remove any registration of MMU notifier updates from the buffer object. + */ +void radeon_mn_unregister(struct radeon_bo *bo) +{ + struct radeon_device *rdev = bo->rdev; + struct radeon_mn *rmn; + + mutex_lock(&rdev->mn_lock); + rmn = bo->mn; + if (rmn == NULL) { + mutex_unlock(&rdev->mn_lock); + return; + } + + mutex_lock(&rmn->lock); + interval_tree_remove(&bo->mn_it, &rmn->objects); + bo->mn = NULL; + mutex_unlock(&rmn->lock); + mutex_unlock(&rdev->mn_lock); +} diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index c73c1e320585..287523807989 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -75,6 +75,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); + radeon_mn_unregister(bo); mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 5dc61c2d4c73..c77495ffc44f 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -818,6 +818,7 @@ struct drm_radeon_gem_create { #define RADEON_GEM_USERPTR_READONLY (1 << 0) #define RADEON_GEM_USERPTR_ANONONLY (1 << 1) #define RADEON_GEM_USERPTR_VALIDATE (1 << 2) +#define RADEON_GEM_USERPTR_REGISTER (1 << 3) struct drm_radeon_gem_userptr { uint64_t addr; -- GitLab From bd645e4314b95b21146aa6ff893d783de20c4e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:04 +0200 Subject: [PATCH 0125/1868] drm/radeon: allow userptr write access under certain conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It needs to be anonymous memory (no file mappings) and we are requried to install an MMU notifier. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 2a6fbf101cf0..01b58941acd4 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -285,19 +285,24 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, if (offset_in_page(args->addr | args->size)) return -EINVAL; - /* we only support read only mappings for now */ - if (!(args->flags & RADEON_GEM_USERPTR_READONLY)) - return -EACCES; - /* reject unknown flag values */ if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE | RADEON_GEM_USERPTR_REGISTER)) return -EINVAL; - /* readonly pages not tested on older hardware */ - if (rdev->family < CHIP_R600) - return -EINVAL; + if (args->flags & RADEON_GEM_USERPTR_READONLY) { + /* readonly pages not tested on older hardware */ + if (rdev->family < CHIP_R600) + return -EINVAL; + + } else if (!(args->flags & RADEON_GEM_USERPTR_ANONONLY) || + !(args->flags & RADEON_GEM_USERPTR_REGISTER)) { + + /* if we want to write to it we must require anonymous + memory and install a MMU notifier */ + return -EACCES; + } down_read(&rdev->exclusive_lock); -- GitLab From e94e37ad19c74b4c2569d556cda9da4a03d4e3f8 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:25 +0100 Subject: [PATCH 0126/1868] drm/i915/bdw: GEN-specific logical ring set/get seqno No mistery here: the seqno is still retrieved from the engine's HW status page (the one in the default context. For the moment, I see no reason to worry about other context's HWS page). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1a1f5f98f05b..c9518c6261de 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -151,6 +151,16 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) +{ + return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +} + +static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) +{ + intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -223,6 +233,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->init = gen8_init_render_ring; ring->cleanup = intel_fini_pipe_control; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -239,6 +251,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -255,6 +269,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -271,6 +287,8 @@ static int logical_blt_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -287,6 +305,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } -- GitLab From 26fbb77445bd402417f42936f68c0da26d33855d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Aug 2014 18:37:37 +0300 Subject: [PATCH 0127/1868] drm/i915: Make hpd debug messages less cryptic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't print raw numbers, use port_name() and tell the user whether it's long or short without having to figure out what the other magic number means. Signed-off-by: Ville Syrjälä Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/intel_dp.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f0d24db76e72..36eb1f234608 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1777,7 +1777,9 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev, long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT; } - DRM_DEBUG_DRIVER("digital hpd port %d %d\n", port, long_hpd); + DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", + port_name(port), + long_hpd ? "long" : "short"); /* for long HPD pulses we want to have the digital queue happen, but we still want HPD storm detection to function. */ if (long_hpd) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3b88255d87dc..def55cdfef25 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4041,7 +4041,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) if (intel_dig_port->base.type != INTEL_OUTPUT_EDP) intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT; - DRM_DEBUG_KMS("got hpd irq on port %d - %s\n", intel_dig_port->port, + DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", + port_name(intel_dig_port->port), long_hpd ? "long" : "short"); if (long_hpd) { -- GitLab From 82e104cc266c6da30a30fc5028b2f0236c669cd7 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:26 +0100 Subject: [PATCH 0128/1868] drm/i915/bdw: New logical ring submission mechanism Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 189 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 13 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 22 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 217 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c9518c6261de..31025847d680 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) +{ + intel_logical_ring_advance(ringbuf); + + if (intel_ring_stopped(ringbuf->ring)) + return; + + /* TODO: how to submit a context to the ELSP is not here yet */ +} + +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +{ + if (ring->outstanding_lazy_seqno) + return 0; + + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); +} + +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + if (ringbuf->last_retired_head != -1) { + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= bytes) { + seqno = request->seqno; + break; + } + } + + if (seqno == 0) + return -ENOSPC; + + ret = i915_wait_seqno(ring, seqno); + if (ret) + return ret; + + /* TODO: make sure we update the right ringbuffer's last_retired_head + * when retiring requests */ + i915_gem_retire_requests_ring(ring); + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + return 0; +} + +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long end; + int ret; + + ret = logical_ring_wait_request(ringbuf, bytes); + if (ret != -ENOSPC) + return ret; + + /* Force the context submission in case we have been skipping it */ + intel_logical_ring_advance_and_submit(ringbuf); + + /* With GEM the hangcheck timer should kick us out of the loop, + * leaving it early runs the risk of corrupting GEM state (due + * to running on almost untested codepaths). But on resume + * timers don't work yet, so prevent a complete hang in that + * case by choosing an insanely large timeout. */ + end = jiffies + 60 * HZ; + + do { + ringbuf->head = I915_READ_HEAD(ring); + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) { + ret = 0; + break; + } + + msleep(1); + + if (dev_priv->mm.interruptible && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + break; + + if (time_after(jiffies, end)) { + ret = -EBUSY; + break; + } + } while (1); + + return ret; +} + +static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf) +{ + uint32_t __iomem *virt; + int rem = ringbuf->size - ringbuf->tail; + + if (ringbuf->space < rem) { + int ret = logical_ring_wait_for_space(ringbuf, rem); + + if (ret) + return ret; + } + + virt = ringbuf->virtual_start + ringbuf->tail; + rem /= 4; + while (rem--) + iowrite32(MI_NOOP, virt++); + + ringbuf->tail = 0; + ringbuf->space = intel_ring_space(ringbuf); + + return 0; +} + +static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) +{ + int ret; + + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { + ret = logical_ring_wrap_buffer(ringbuf); + if (unlikely(ret)) + return ret; + } + + if (unlikely(ringbuf->space < bytes)) { + ret = logical_ring_wait_for_space(ringbuf, bytes); + if (unlikely(ret)) + return ret; + } + + return 0; +} + +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + return ret; + + ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t)); + if (ret) + return ret; + + /* Preallocate the olr before touching the ring */ + ret = logical_ring_alloc_seqno(ring); + if (ret) + return ret; + + ringbuf->space -= num_dwords * sizeof(uint32_t); + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index bf0eff4e9f08..4e032875c1fd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) +{ + ringbuf->tail &= ringbuf->size - 1; +} +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, + u32 data) +{ + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); + ringbuf->tail += 4; +} +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dab5e7c79036..0bfa018fab20 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring) return ring->buffer && ring->buffer->obj; } -static inline int __ring_space(int head, int tail, int size) +int __intel_ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); if (space < 0) @@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size) return space; } -static inline int ring_space(struct intel_ringbuffer *ringbuf) +int intel_ring_space(struct intel_ringbuffer *ringbuf) { - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); + return __intel_ring_space(ringbuf->head & HEAD_ADDR, + ringbuf->tail, ringbuf->size); } -static bool intel_ring_stopped(struct intel_engine_cs *ring) +bool intel_ring_stopped(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); @@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring) else { ringbuf->head = I915_READ_HEAD(ring); ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); ringbuf->last_retired_head = -1; } @@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) return 0; } list_for_each_entry(request, &ring->request_list, list) { - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= n) { seqno = request->seqno; break; } @@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } @@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) trace_i915_ring_wait_begin(ring); do { ringbuf->head = I915_READ_HEAD(ring); - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) { ret = 0; break; @@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) iowrite32(MI_NOOP, virt++); ringbuf->tail = 0; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 677df0d7be48..81bad364e36d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring) struct intel_ringbuffer *ringbuf = ring->buffer; ringbuf->tail &= ringbuf->size - 1; } +int __intel_ring_space(int head, int tail, int size); +int intel_ring_space(struct intel_ringbuffer *ringbuf); +bool intel_ring_stopped(struct intel_engine_cs *ring); void __intel_ring_advance(struct intel_engine_cs *ring); int __must_check intel_ring_idle(struct intel_engine_cs *ring); -- GitLab From 4da46e1e5bb7e7396fad172cdaffbe496562f3d8 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:27 +0100 Subject: [PATCH 0129/1868] drm/i915/bdw: GEN-specific logical ring emit request Very similar to the legacy add_request, only modified to account for logical ringbuffer. v2: Use MI_GLOBAL_GTT, as suggested by Brad Volkin. v3: Unify render and non-render in the same function, as noticed by Brad Volkin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 31 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c1d24242a02d..3388afb90a93 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -272,6 +272,7 @@ #define MI_SEMAPHORE_POLL (1<<15) #define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) +#define MI_STORE_DWORD_IMM_GEN8 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX_SHIFT 2 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 31025847d680..94f8b4087642 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -350,6 +350,32 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } +static int gen8_emit_request(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + cmd = MI_STORE_DWORD_IMM_GEN8; + cmd |= MI_GLOBAL_GTT; + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + (ring->status_page.gfx_addr + + (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno); + intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance_and_submit(ringbuf); + + return 0; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -424,6 +450,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->cleanup = intel_fini_pipe_control; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -442,6 +469,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -460,6 +488,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -478,6 +507,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -496,6 +526,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 81bad364e36d..467885159a80 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -216,6 +216,9 @@ struct intel_engine_cs { unsigned int num_dwords); } semaphore; + /* Execlists */ + int (*emit_request)(struct intel_ringbuffer *ringbuf); + /** * List of objects currently involved in rendering from the * ringbuffer. -- GitLab From 4712274c362b7730a1c6e01c9a51a6d46f5b7f43 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:28 +0100 Subject: [PATCH 0130/1868] drm/i915/bdw: GEN-specific logical ring emit flush Same as the legacy-style ring->flush. v2: The BSD invalidate bit still exists in GEN8! Add it for the VCS rings (but still consolidate the blt and bsd ring flushes into one). This was noticed by Brad Volkin. v3: The command for BSD and for other rings is slightly different: get it exactly the same as in gen6_ring_flush + gen6_bsd_ring_flush Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 85 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 7 -- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 +++ 3 files changed, 95 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 94f8b4087642..a88fa6e9360b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -340,6 +340,86 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 unused) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + cmd = MI_FLUSH_DW + 1; + + if (ring == &dev_priv->ring[VCS]) { + if (invalidate_domains & I915_GEM_GPU_DOMAINS) + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | + MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } else { + if (invalidate_domains & I915_GEM_DOMAIN_RENDER) + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_logical_ring_emit(ringbuf, 0); /* upper addr */ + intel_logical_ring_emit(ringbuf, 0); /* value */ + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 flags = 0; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); + intel_logical_ring_emit(ringbuf, flags); + intel_logical_ring_emit(ringbuf, scratch_addr); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); @@ -451,6 +531,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush_render; return logical_ring_init(dev, ring); } @@ -470,6 +551,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -489,6 +571,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -508,6 +591,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -527,6 +611,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bfa018fab20..4236014c1cda 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,13 +33,6 @@ #include "i915_trace.h" #include "intel_drv.h" -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 - bool intel_ring_initialized(struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 467885159a80..e497837c7724 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,13 @@ #define I915_CMD_HASH_ORDER 9 +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 + /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" @@ -218,6 +225,9 @@ struct intel_engine_cs { /* Execlists */ int (*emit_request)(struct intel_ringbuffer *ringbuf); + int (*emit_flush)(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains); /** * List of objects currently involved in rendering from the -- GitLab From 9832b9dae8f9f505c7ed898a043b4f54b54597ed Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:30 +0100 Subject: [PATCH 0131/1868] drm/i915/bdw: Ring idle and stop with logical rings This is a hard one, since there is no direct hardware ring to control when in Execlists. We reuse intel_ring_idle here, but it should be fine as long as i915_add_request does the ring thing. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a88fa6e9360b..8a524baa8a6b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -105,7 +105,24 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, void intel_logical_ring_stop(struct intel_engine_cs *ring) { - /* TODO */ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + int ret; + + if (!intel_ring_initialized(ring)) + return; + + ret = intel_ring_idle(ring); + if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error)) + DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", + ring->name, ret); + + /* TODO: Is this correct with Execlists enabled? */ + I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); + if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + return; + } + I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) @@ -458,10 +475,13 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; + if (!intel_ring_initialized(ring)) return; - /* TODO: make sure the ring is stopped */ + intel_logical_ring_stop(ring); + WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); ring->preallocated_lazy_request = NULL; ring->outstanding_lazy_seqno = 0; -- GitLab From 73d477f6bb17a1f14c4897a4b4a6597fe9a38ad2 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:31 +0100 Subject: [PATCH 0132/1868] drm/i915/bdw: Interrupts with logical rings We need to attend context switch interrupts from all rings. Also, fixed writing IMR/IER and added HWSTAM at ring init time. Notice that, if added to irq_enable_mask, the context switch interrupts would be incorrectly masked out when the user interrupts are due to no users waiting on a sequence number. Therefore, this commit adds a bitmask of interrupts to be kept unmasked at all times. v2: Disable HWSTAM, as suggested by Damien (nobody listens to these interrupts, anyway). v3: Add new get/put_irq functions. Signed-off-by: Thomas Daniel (v1) Signed-off-by: Oscar Mateo (v2 & v3) Reviewed-by: Damien Lespiau [danvet: Drop the GEN8_ prefix from the context switch interrupt define and move it to its brethren.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 19 ++++++-- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_lrc.c | 58 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 36eb1f234608..00957fa0b877 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1647,6 +1647,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, notify_ring(dev, &dev_priv->ring[RCS]); if (bcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[BCS]); + if ((rcs | bcs) & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1659,9 +1661,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VCS]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); vcs = tmp >> GEN8_VCS2_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VCS2]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1685,6 +1691,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, vcs = tmp >> GEN8_VECS_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VECS]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } @@ -3788,12 +3796,17 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) /* These are interrupts we'll toggle with the ring mask register */ uint32_t gt_interrupts[] = { GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT, + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT, GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, 0, - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT }; for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3388afb90a93..f79c20d49d99 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1087,6 +1087,7 @@ enum punit_power_well { #define RING_ACTHD_UDW(base) ((base)+0x5c) #define RING_NOPID(base) ((base)+0x94) #define RING_IMR(base) ((base)+0xa8) +#define RING_HWSTAM(base) ((base)+0x98) #define RING_TIMESTAMP(base) ((base)+0x358) #define TAIL_ADDR 0x001FFFF8 #define HEAD_WRAP_COUNT 0xFFE00000 @@ -1403,6 +1404,7 @@ enum punit_power_well { #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8a524baa8a6b..009a8b5c088e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -319,6 +319,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff); + I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); @@ -357,6 +360,39 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + if (!dev->irq_enabled) + return false; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (ring->irq_refcount++ == 0) { + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + + return true; +} + +static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (--ring->irq_refcount == 0) { + I915_WRITE_IMR(ring, ~ring->irq_keep_mask); + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} + static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, u32 unused) @@ -545,6 +581,10 @@ static int logical_render_ring_init(struct drm_device *dev) ring->mmio_base = RENDER_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + if (HAS_L3_DPF(dev)) + ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; ring->init = gen8_init_render_ring; ring->cleanup = intel_fini_pipe_control; @@ -552,6 +592,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -566,12 +608,16 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->mmio_base = GEN6_BSD_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -586,12 +632,16 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->mmio_base = GEN8_BSD2_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -606,12 +656,16 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->mmio_base = BLT_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -626,12 +680,16 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->mmio_base = VEBOX_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e497837c7724..cb529ee10c8f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -224,6 +224,7 @@ struct intel_engine_cs { } semaphore; /* Execlists */ + u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, -- GitLab From 156485852684b511be28a83c78fece8b27ef7c26 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:32 +0100 Subject: [PATCH 0133/1868] drm/i915/bdw: GEN-specific logical ring emit batchbuffer start Dispatch_execbuffer's evil twin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Ditch the check for aliasing ppgtt. It'll break soon and execlists requires full ppgtt anyway.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 009a8b5c088e..e0d4ef2a5c30 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -360,6 +360,28 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, + u64 offset, unsigned flags) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + bool ppgtt = !(flags & I915_DISPATCH_SECURE); + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + /* FIXME(BDW): Address space and security selectors. */ + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); + intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -594,6 +616,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush_render; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -618,6 +641,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -642,6 +666,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -666,6 +691,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -690,6 +716,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cb529ee10c8f..24437da91f77 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -229,6 +229,8 @@ struct intel_engine_cs { int (*emit_flush)(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, u32 flush_domains); + int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, + u64 offset, unsigned flags); /** * List of objects currently involved in rendering from the -- GitLab From ba8b7ccb196b07c1c553450e8e7b44a7a938e58a Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:33 +0100 Subject: [PATCH 0134/1868] drm/i915/bdw: Workload submission mechanism for Execlists This is what i915_gem_do_execbuffer calls when it wants to execute some worload in an Execlists world. v2: Check arguments before doing stuff in intel_execlists_submission. Also, get rel_constants parsing right. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Drop the chipset flush, that's pre-gen6. And appease checkpatch a bit .... again!] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 130 ++++++++++++++++++++- 3 files changed, 135 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9198f1c96470..53cc4c083d0d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2266,6 +2266,12 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_execbuffer_move_to_active(struct list_head *vmas, + struct intel_engine_cs *ring); +void i915_gem_execbuffer_retire_commands(struct drm_device *dev, + struct drm_file *file, + struct intel_engine_cs *ring, + struct drm_i915_gem_object *obj); int i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 26b38b3ae4f3..7e04a4825ed0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -942,7 +942,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -static void +void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_engine_cs *ring) { @@ -983,7 +983,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, } } -static void +void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0d4ef2a5c30..e1a298f23f50 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -91,6 +91,55 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + uint32_t flush_domains; + int ret; + + flush_domains = 0; + if (ring->gpu_caches_dirty) + flush_domains = I915_GEM_GPU_DOMAINS; + + ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + return 0; +} + +static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, + struct list_head *vmas) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct i915_vma *vma; + uint32_t flush_domains = 0; + bool flush_chipset = false; + int ret; + + list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_object *obj = vma->obj; + + ret = i915_gem_object_sync(obj, ring); + if (ret) + return ret; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + flush_chipset |= i915_gem_clflush_object(obj, false); + + flush_domains |= obj->base.write_domain; + } + + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + + /* Unconditionally invalidate gpu caches and ensure that we do flush + * any residual writes from the previous batch. + */ + return logical_ring_invalidate_all_caches(ringbuf); +} + int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct intel_context *ctx, @@ -99,7 +148,84 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 flags) { - /* TODO */ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + int instp_mode; + u32 instp_mask; + int ret; + + instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + instp_mask = I915_EXEC_CONSTANTS_MASK; + switch (instp_mode) { + case I915_EXEC_CONSTANTS_REL_GENERAL: + case I915_EXEC_CONSTANTS_ABSOLUTE: + case I915_EXEC_CONSTANTS_REL_SURFACE: + if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { + DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); + return -EINVAL; + } + + if (instp_mode != dev_priv->relative_constants_mode) { + if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { + DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); + return -EINVAL; + } + + /* The HW changed the meaning on this bit on gen6 */ + instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + } + break; + default: + DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + return -EINVAL; + } + + if (args->num_cliprects != 0) { + DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); + return -EINVAL; + } else { + if (args->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + args->DR4 = 0; + } + + if (args->DR1 || args->DR4 || args->cliprects_ptr) { + DRM_DEBUG("0 cliprects but dirt in cliprects fields\n"); + return -EINVAL; + } + } + + if (args->flags & I915_EXEC_GEN7_SOL_RESET) { + DRM_DEBUG("sol reset is gen7 only\n"); + return -EINVAL; + } + + ret = execlists_move_to_gpu(ringbuf, vmas); + if (ret) + return ret; + + if (ring == &dev_priv->ring[RCS] && + instp_mode != dev_priv->relative_constants_mode) { + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit(ringbuf, INSTPM); + intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); + intel_logical_ring_advance(ringbuf); + + dev_priv->relative_constants_mode = instp_mode; + } + + ret = ring->emit_bb_start(ringbuf, exec_start, flags); + if (ret) + return ret; + + i915_gem_execbuffer_move_to_active(vmas, ring); + i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); + return 0; } @@ -363,8 +489,6 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, u64 offset, unsigned flags) { - struct intel_engine_cs *ring = ringbuf->ring; - struct drm_i915_private *dev_priv = ring->dev->dev_private; bool ppgtt = !(flags & I915_DISPATCH_SECURE); int ret; -- GitLab From 14bf993e83e1d6924f4bf4506120a15c4b255e58 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:34 +0100 Subject: [PATCH 0135/1868] drm/i915/bdw: Always use MMIO flips with Execlists The normal flip function places things in the ring in the legacy way, so we either fix that or force MMIO flips always as we do in this patch. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch. Fucking again.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 245cf4128314..1bd1aa21a8e9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9536,6 +9536,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring, return false; else if (i915.use_mmio_flip > 0) return true; + else if (i915.enable_execlists) + return true; else return ring != obj->ring; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e1a298f23f50..6f1b64e01a05 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -85,7 +85,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists if (enable_execlists == 0) return 0; - if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev)) + if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) && + i915.use_mmio_flip >= 0) return 1; return 0; -- GitLab From b9d06dd9d1dd3672b391e6387d62aa8dc4e377bd Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 6 Aug 2014 15:04:44 +0200 Subject: [PATCH 0136/1868] drm/i915: vma/ppgtt lifetime rules VMAs should take a reference of the address space they use. Now, when the fd is closed, it will release the ref that the context was holding, but it will still be referenced by any vmas that are still active. ppgtt_release() should then only be called when the last thing referencing it releases the ref, and it can just call the base cleanup and free the ppgtt. Note that with this we will extend the lifetime of ppgtts which contain shared objects. But all the non-shared objects will get removed as soon as they drop of the active list and for the shared ones the shrinker can eventually reap them. Since we currently can't evict ppgtt pagetables either I don't think that temporary leak is important. Signed-off-by: Michel Thierry [danvet: Add note about potential ppgtt leak with this approach.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 23 +++-------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++++ 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 53cc4c083d0d..38b1849a450a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2547,7 +2547,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); +void ppgtt_release(struct kref *kref); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9acb2469116a..63aee412b258 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4476,12 +4476,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { + struct i915_address_space *vm = NULL; + struct i915_hw_ppgtt *ppgtt = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ if (!list_empty(&vma->exec_list)) return; + vm = vma->vm; + ppgtt = vm_to_ppgtt(vm); + + if (ppgtt) + kref_put(&ppgtt->ref, ppgtt_release); + list_del(&vma->vma_link); kfree(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7a08f3e9e1ae..a509a4bca991 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) return; } - /* - * Make sure vmas are unbound before we take down the drm_mm - * - * FIXME: Proper refcounting should take care of this, this shouldn't be - * needed at all. - */ - if (!list_empty(&vm->active_list)) { - struct i915_vma *vma; - - list_for_each_entry(vma, &vm->active_list, mm_list) - if (WARN_ON(list_empty(&vma->vma_link) || - list_is_singular(&vma->vma_link))) - break; - - i915_gem_evict_vm(&ppgtt->base, true); - } else { - i915_gem_retire_requests(dev); - i915_gem_evict_vm(&ppgtt->base, false); - } + /* vmas should already be unbound */ + WARN_ON(!list_empty(&vm->active_list)); ppgtt->base.cleanup(&ppgtt->base); } -static void ppgtt_release(struct kref *kref) +void ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b4b7cfd226b7..76dd3b428483 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2148,10 +2148,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { struct i915_vma *vma; + struct i915_hw_ppgtt *ppgtt = NULL; vma = i915_gem_obj_to_vma(obj, vm); if (!vma) vma = __i915_gem_vma_create(obj, vm); + ppgtt = vm_to_ppgtt(vm); + if (ppgtt) + kref_get(&ppgtt->ref); + return vma; } -- GitLab From ee960be7bb09b201926cb37eaa82fb7da605ea7c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:45 +0200 Subject: [PATCH 0137/1868] drm/i915: Some cleanups for the ppgtt lifetime handling So when reviewing Michel's patch I've noticed a few things and cleaned them up: - The early checks in ppgtt_release are now redundant: The inactive list should always be empty now, so we can ditch these checks. Even for the aliasing ppgtt (though that's a different confusion) since we tear that down after all the objects are gone. - The ppgtt handling functions are splattered all over. Consolidate them in i915_gem_gtt.c, give them OCD prefixes and add wrappers for get/put. - There was a bit a confusion in ppgtt_release about whether it cares about the active or inactive list. It should care about them both, so augment the WARNINGs to check for both. There's still create_vm_for_ctx left to do, put that is blocked on the removal of ppgtt->ctx. Once that's done we can rename it to i915_ppgtt_create and move it to its siblings for handling ppgtts. v2: Move the ppgtt checks into the inline get/put functions as suggested by Chris. v3: Inline the now redundant ppgtt local variable. Cc: Michel Thierry Cc: Chris Wilson Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 5 +--- drivers/gpu/drm/i915/i915_gem_context.c | 36 +++---------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 ++++++++++---- drivers/gpu/drm/i915/i915_gem_gtt.h | 14 +++++++++- 5 files changed, 33 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38b1849a450a..101fc637eb46 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2549,7 +2549,6 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) #define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); -void ppgtt_release(struct kref *kref); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 63aee412b258..8061d45eaa80 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4477,7 +4477,6 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { struct i915_address_space *vm = NULL; - struct i915_hw_ppgtt *ppgtt = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ @@ -4485,10 +4484,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma) return; vm = vma->vm; - ppgtt = vm_to_ppgtt(vm); - if (ppgtt) - kref_put(&ppgtt->ref, ppgtt_release); + i915_ppgtt_put(vm_to_ppgtt(vm)); list_del(&vma->vma_link); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a509a4bca991..dc43d0263a01 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,33 +96,6 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) -{ - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_address_space *vm = &ppgtt->base; - - if (ppgtt == dev_priv->mm.aliasing_ppgtt || - (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { - ppgtt->base.cleanup(&ppgtt->base); - return; - } - - /* vmas should already be unbound */ - WARN_ON(!list_empty(&vm->active_list)); - - ppgtt->base.cleanup(&ppgtt->base); -} - -void ppgtt_release(struct kref *kref) -{ - struct i915_hw_ppgtt *ppgtt = - container_of(kref, struct i915_hw_ppgtt, ref); - - do_ppgtt_cleanup(ppgtt); - kfree(ppgtt); -} - static size_t get_context_alignment(struct drm_device *dev) { if (IS_GEN6(dev)) @@ -174,8 +147,7 @@ void i915_gem_context_free(struct kref *ctx_ref) ppgtt = ctx_to_ppgtt(ctx); } - if (ppgtt) - kref_put(&ppgtt->ref, ppgtt_release); + i915_ppgtt_put(ppgtt); if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); @@ -222,7 +194,7 @@ create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_gem_init_ppgtt(dev, ppgtt); + ret = i915_ppgtt_init(dev, ppgtt); if (ret) { kfree(ppgtt); return ERR_PTR(ret); @@ -234,7 +206,7 @@ create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) static struct intel_context * __create_hw_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; @@ -342,7 +314,7 @@ i915_gem_create_context(struct drm_device *dev, /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - kref_get(&dev_priv->mm.aliasing_ppgtt->ref); + i915_ppgtt_get(dev_priv->mm.aliasing_ppgtt); } else ctx->vm = &dev_priv->gtt.base; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 76dd3b428483..6ffa12b72538 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1180,7 +1180,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; @@ -1211,6 +1211,19 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +void i915_ppgtt_release(struct kref *kref) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(kref, struct i915_hw_ppgtt, ref); + + /* vmas should already be unbound */ + WARN_ON(!list_empty(&ppgtt->base.active_list)); + WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + + ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); +} + static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, @@ -2148,15 +2161,12 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { struct i915_vma *vma; - struct i915_hw_ppgtt *ppgtt = NULL; vma = i915_gem_obj_to_vma(obj, vm); if (!vma) vma = __i915_gem_vma_create(obj, vm); - ppgtt = vm_to_ppgtt(vm); - if (ppgtt) - kref_get(&ppgtt->ref); + i915_ppgtt_get(vm_to_ppgtt(vm)); return vma; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 666c938a51e3..c6beb528f955 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,7 +272,19 @@ void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); -int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); + +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +void i915_ppgtt_release(struct kref *kref); +static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt) + kref_get(&ppgtt->ref); +} +static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt) + kref_put(&ppgtt->ref, i915_ppgtt_release); +} void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); -- GitLab From 4d884705dababd7d0f3f12796bc7b45e84962596 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:47 +0200 Subject: [PATCH 0138/1868] drm/i915: Track file_priv, not ctx in the ppgtt structure Hardware contexts reference a ppgtt, not the other way round. And the only user of this (in debugfs) actually only cares about which file the ppgtt is associated with. So give it what it wants. While at it give the ppgtt create function a proper name&place. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 22 +--------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +++++- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b7decbeeed3..1c3a9943a742 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -333,7 +333,7 @@ static int per_file_stats(int id, void *ptr, void *data) } ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv) + if (ppgtt->file_priv != stats->file_priv) continue; if (obj->ring) /* XXX per-vma statistic */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dc43d0263a01..90665872734d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -184,26 +184,6 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } -static struct i915_hw_ppgtt * -create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt; - int ret; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - ret = i915_ppgtt_init(dev, ppgtt); - if (ret) { - kfree(ppgtt); - return ERR_PTR(ret); - } - - ppgtt->ctx = ctx; - return ppgtt; -} - static struct intel_context * __create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -290,7 +270,7 @@ i915_gem_create_context(struct drm_device *dev, } if (create_vm) { - struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx); + struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); if (IS_ERR_OR_NULL(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6ffa12b72538..a5715faba65f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1211,6 +1211,27 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +struct i915_hw_ppgtt * +i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +{ + struct i915_hw_ppgtt *ppgtt; + int ret; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ret = i915_ppgtt_init(dev, ppgtt); + if (ret) { + kfree(ppgtt); + return ERR_PTR(ret); + } + + ppgtt->file_priv = fpriv; + + return ppgtt; +} + void i915_ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c6beb528f955..90ff45246b62 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -34,6 +34,8 @@ #ifndef __I915_GEM_GTT_H__ #define __I915_GEM_GTT_H__ +struct drm_i915_file_private; + typedef uint32_t gen6_gtt_pte_t; typedef uint64_t gen8_gtt_pte_t; typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; @@ -258,7 +260,7 @@ struct i915_hw_ppgtt { dma_addr_t *gen8_pt_dma_addr[4]; }; - struct intel_context *ctx; + struct drm_i915_file_private *file_priv; int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, @@ -275,6 +277,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); void i915_ppgtt_release(struct kref *kref); +struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, + struct drm_i915_file_private *fpriv); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) -- GitLab From 841cd7737557785c0f215b0984c06aaaaa882302 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:48 +0200 Subject: [PATCH 0139/1868] drm/i915: Only refcount ppgtt if it actually is one This essentially unbreaks non-ppgtt operation where we'd scribble over random memory. While at it give the vm_to_ppgtt function a proper prefix and make it a bit more paranoid. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 101fc637eb46..454badf31dfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2512,6 +2512,15 @@ static inline bool i915_is_ggtt(struct i915_address_space *vm) return vm == ggtt; } +static inline struct i915_hw_ppgtt * +i915_vm_to_ppgtt(struct i915_address_space *vm) +{ + WARN_ON(i915_is_ggtt(vm)); + + return container_of(vm, struct i915_hw_ppgtt, base); +} + + static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { return i915_gem_obj_bound(obj, obj_to_ggtt(obj)); @@ -2547,7 +2556,6 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) -#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8061d45eaa80..e3e30cd474be 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4485,7 +4485,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma) vm = vma->vm; - i915_ppgtt_put(vm_to_ppgtt(vm)); + if (!i915_is_ggtt(vm)) + i915_ppgtt_put(i915_vm_to_ppgtt(vm)); list_del(&vma->vma_link); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5715faba65f..48d8f4a21c3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2187,7 +2187,8 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, if (!vma) vma = __i915_gem_vma_create(obj, vm); - i915_ppgtt_get(vm_to_ppgtt(vm)); + if (!i915_is_ggtt(vm)) + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); return vma; } -- GitLab From 5dc383b05a05d05e964172d882603cd171040c5f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:49 +0200 Subject: [PATCH 0140/1868] drm/i915: Add proper prefix to obj_to_ggtt Stuff in headers really aught to have this. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 11 ++++++----- drivers/gpu/drm/i915/i915_gem.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 454badf31dfd..2db22732c7ae 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2503,7 +2503,7 @@ static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) { } /* Some GGTT VM helpers */ -#define obj_to_ggtt(obj) \ +#define i915_obj_to_ggtt(obj) \ (&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base) static inline bool i915_is_ggtt(struct i915_address_space *vm) { @@ -2523,19 +2523,19 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { - return i915_gem_obj_bound(obj, obj_to_ggtt(obj)); + return i915_gem_obj_bound(obj, i915_obj_to_ggtt(obj)); } static inline unsigned long i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj) { - return i915_gem_obj_offset(obj, obj_to_ggtt(obj)); + return i915_gem_obj_offset(obj, i915_obj_to_ggtt(obj)); } static inline unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj) { - return i915_gem_obj_size(obj, obj_to_ggtt(obj)); + return i915_gem_obj_size(obj, i915_obj_to_ggtt(obj)); } static inline int __must_check @@ -2543,7 +2543,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, unsigned flags) { - return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL); + return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), + alignment, flags | PIN_GLOBAL); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3e30cd474be..c9d1396781e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5252,7 +5252,7 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) struct i915_vma *vma; vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); - if (vma->vm != obj_to_ggtt(obj)) + if (vma->vm != i915_obj_to_ggtt(obj)) return NULL; return vma; -- GitLab From 6c5566a82c6fb1da9e13a294f23d4cd85a08cb30 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:50 +0200 Subject: [PATCH 0141/1868] drm/i915: Allow i915_gem_setup_global_gtt to fail We already needs this just as a safety check in case the preallocation reservation dance fails. But we definitely need this to be able to move tha aliasing ppgtt setup back out of the context code to this place, where it belongs. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++++++++++------ drivers/gpu/drm/i915/i915_gem_gtt.h | 4 ++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9d1396781e2..c8404a439502 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4754,7 +4754,12 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.stop_ring = intel_logical_ring_stop; } - i915_gem_init_userptr(dev); + ret = i915_gem_init_userptr(dev); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return ret; + } + i915_gem_init_global_gtt(dev); ret = i915_gem_context_init(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 48d8f4a21c3f..d228f839ca4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1698,10 +1698,10 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, } } -void i915_gem_setup_global_gtt(struct drm_device *dev, - unsigned long start, - unsigned long mappable_end, - unsigned long end) +int i915_gem_setup_global_gtt(struct drm_device *dev, + unsigned long start, + unsigned long mappable_end, + unsigned long end) { /* Let GEM Manage all of the aperture. * @@ -1734,8 +1734,10 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, WARN_ON(i915_gem_obj_ggtt_bound(obj)); ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); - if (ret) - DRM_DEBUG_KMS("Reservation failed\n"); + if (ret) { + DRM_DEBUG_KMS("Reservation failed: %i\n", ret); + return ret; + } obj->has_global_gtt_mapping = 1; } @@ -1752,6 +1754,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); + + return 0; } void i915_gem_init_global_gtt(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 90ff45246b62..0eb0dddff76b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -271,8 +271,8 @@ struct i915_hw_ppgtt { int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); -void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, - unsigned long mappable_end, unsigned long end); +int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, + unsigned long mappable_end, unsigned long end); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); -- GitLab From 896ab1a5d54269b463a24194c2e4a369103b46d8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:51 +0200 Subject: [PATCH 0142/1868] drm/i915: Fix up checks for aliasing ppgtt A subsequent patch will no longer initialize the aliasing ppgtt if we have full ppgtt enabled, since we simply don't need that any more. Unfortunately a few places check for the aliasing ppgtt instead of checking for ppgtt in general. Fix them up. One special case are the gtt offset and size macros, which have some code to remap the aliasing ppgtt to the global gtt. The aliasing ppgtt is _not_ a logical address space, so passing that in as the vm is plain and simple a bug. So just WARN about it and carry on - we have a gracefully fall-through anyway if we can't find the vma. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 ++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index dea99d92fb4a..c45856bcc8b9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -842,8 +842,6 @@ static u32 *vmap_batch(struct drm_i915_gem_object *obj) */ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - if (!ring->needs_cmd_parser) return false; @@ -852,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) * disabled. That will cause all of the parser's PPGTT checks to * fail. For now, disable parsing when PPGTT is off. */ - if (!dev_priv->mm.aliasing_ppgtt) + if (USES_PPGTT(ring->dev)) return false; return (i915.enable_cmd_parser == 1); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1763fbf34e1d..895f9f2f35ea 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -999,7 +999,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = HAS_WT(dev); break; case I915_PARAM_HAS_ALIASING_PPGTT: - value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev); + value = USES_PPGTT(dev); break; case I915_PARAM_HAS_WAIT_TIMEOUT: value = 1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8404a439502..6a7795097017 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5106,9 +5106,7 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, vma_link) { if (vma->vm == vm) @@ -5149,9 +5147,7 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); BUG_ON(list_empty(&o->vma_list)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4236014c1cda..13543f8528c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2006,9 +2006,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, unsigned flags) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && - !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); -- GitLab From 82460d97246a993aa49e88bf9b4154cce60f8da8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 20:19:53 +0200 Subject: [PATCH 0143/1868] drm/i915: Rework ppgtt init to no require an aliasing ppgtt Currently we abuse the aliasing ppgtt to set up the ppgtt support in general. Which is a bit backwards since with full ppgtt we don't ever need the aliasing ppgtt. So untangle this and separate the ppgtt init from the aliasing ppgtt. While at it drag it out of the context enabling (which just does a switch to the default context). Note that we still have the differentiation between synchronous and asynchronous ppgtt setup, but that will soon vanish. So also correctly wire up the return value handling to be prepared for when ->switch_mm drops the synchronous parameter and could start to fail. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 +++ drivers/gpu/drm/i915/i915_gem_context.c | 7 --- drivers/gpu/drm/i915/i915_gem_gtt.c | 84 ++++++++++--------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 4 files changed, 42 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a7795097017..6c2f0b886eb0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4719,6 +4719,14 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable failed %d\n", ret); i915_gem_cleanup_ringbuffer(dev); + + return ret; + } + + ret = i915_ppgtt_init_hw(dev); + if (ret && ret != -EIO) { + DRM_ERROR("PPGTT enable failed %d\n", ret); + i915_gem_cleanup_ringbuffer(dev); } return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 90665872734d..eece059a8447 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -436,13 +436,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv) struct intel_engine_cs *ring; int ret, i; - /* This is the only place the aliasing PPGTT gets enabled, which means - * it has to happen before we bail on reset */ - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->enable(ppgtt); - } - /* FIXME: We should make this work, even in reset */ if (i915_reset_in_progress(&dev_priv->gpu_error)) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d228f839ca4f..b7dcf72126f9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -67,7 +67,6 @@ static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); static void ppgtt_unbind_vma(struct i915_vma *vma); -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, @@ -604,7 +603,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) kunmap_atomic(pd_vaddr); } - ppgtt->enable = gen8_ppgtt_enable; ppgtt->switch_mm = gen8_mm_switch; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; @@ -825,39 +823,20 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen8_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - int j, ret; + int j; for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - /* We promise to do a switch later with FULL PPGTT. If this is - * aliasing, this is the one and only switch we'll do */ - if (USES_FULL_PPGTT(dev)) - continue; - - ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - goto err_out; } - - return 0; - -err_out: - for_each_ring(ring, dev_priv, j) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); - return ret; } -static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen7_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; uint32_t ecochk, ecobits; @@ -876,31 +855,16 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GAM_ECOCHK, ecochk); for_each_ring(ring, dev_priv, i) { - int ret; /* GFX_MODE is per-ring on gen7+ */ I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - /* We promise to do a switch later with FULL PPGTT. If this is - * aliasing, this is the one and only switch we'll do */ - if (USES_FULL_PPGTT(dev)) - continue; - - ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - return ret; } - - return 0; } -static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen6_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; uint32_t ecochk, gab_ctl, ecobits; - int i; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -913,14 +877,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - for_each_ring(ring, dev_priv, i) { - int ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - return ret; - } - - return 0; } /* PPGTT support for Sandybdrige/Gen6 and later */ @@ -1140,13 +1096,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; if (IS_GEN6(dev)) { - ppgtt->enable = gen6_ppgtt_enable; ppgtt->switch_mm = gen6_mm_switch; } else if (IS_HASWELL(dev)) { - ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = hsw_mm_switch; } else if (IS_GEN7(dev)) { - ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = gen7_mm_switch; } else BUG(); @@ -1211,6 +1164,35 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +int i915_ppgtt_init_hw(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + int i, ret = 0; + + if (!USES_PPGTT(dev)) + return 0; + + if (IS_GEN6(dev)) + gen6_ppgtt_enable(dev); + else if (IS_GEN7(dev)) + gen7_ppgtt_enable(dev); + else if (INTEL_INFO(dev)->gen >= 8) + gen8_ppgtt_enable(dev); + else + WARN_ON(1); + + if (ppgtt) { + for_each_ring(ring, dev_priv, i) { + ret = ppgtt->switch_mm(ppgtt, ring, true); + if (ret != 0) + return ret; + } + } + + return ret; +} struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0eb0dddff76b..98fbb7309ea5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -276,6 +276,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv); -- GitLab From fa76da3499f1789f0e37d3bbcdc320bdf47c89ca Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 20:19:54 +0200 Subject: [PATCH 0144/1868] drm/i915: Initialize the aliasing ppgtt as part of global gtt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stuffing this into the context setup code doesn't make a lot of sense. Also reusing the real ppgtt setup code makes even less sense since the aliasing ppgtt isn't a real address space. Leaving all that stuff unitialized will make sure that we catch any abusers promptly. This is also a prep work to clean up the context->ppgtt link. v2: Fix up the logic fail, I've fumbled it so badly to completely disable ppgtt on gen6. Spotted by Ville and Michel. Also move around the pde write into the gen6 init function, since otherwise it won't work at all. v3: Only initialize the aliasing ppgtt when we actually enable it. Cc: "Thierry, Michel" Cc: Ville Syrjälä Reviewed-by: Michel Thierry [danvet: Squash in fixup from Fengguang Wu.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 13 +------- drivers/gpu/drm/i915/i915_gem_gtt.c | 42 ++++++++++++++++++------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index eece059a8447..7093f5df9ee7 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -279,17 +279,6 @@ i915_gem_create_context(struct drm_device *dev, goto err_unpin; } else ctx->vm = &ppgtt->base; - - /* This case is reserved for the global default context and - * should only happen once. */ - if (is_global_default_ctx) { - if (WARN_ON(dev_priv->mm.aliasing_ppgtt)) { - ret = -EEXIST; - goto err_unpin; - } - - dev_priv->mm.aliasing_ppgtt = ppgtt; - } } else if (USES_PPGTT(dev)) { /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ @@ -368,7 +357,7 @@ int i915_gem_context_init(struct drm_device *dev) } } - ctx = i915_gem_create_context(dev, NULL, USES_PPGTT(dev)); + ctx = i915_gem_create_context(dev, NULL, USES_FULL_PPGTT(dev)); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", PTR_ERR(ctx)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b7dcf72126f9..b033ff770ac4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1130,35 +1130,38 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); + gen6_write_pdes(ppgtt); + DRM_DEBUG("Adding PPGTT at offset %x\n", + ppgtt->pd_offset << 10); + return 0; } -int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; ppgtt->base.dev = dev; ppgtt->base.scratch = dev_priv->gtt.base.scratch; if (INTEL_INFO(dev)->gen < 8) - ret = gen6_ppgtt_init(ppgtt); + return gen6_ppgtt_init(ppgtt); else if (IS_GEN8(dev)) - ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); + return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); else BUG(); +} +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret = 0; - if (!ret) { - struct drm_i915_private *dev_priv = dev->dev_private; + ret = __hw_ppgtt_init(dev, ppgtt); + if (ret == 0) { kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); i915_init_vm(dev_priv, &ppgtt->base); - if (INTEL_INFO(dev)->gen < 8) { - gen6_write_pdes(ppgtt); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd_offset << 10); - } } return ret; @@ -1699,6 +1702,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, struct drm_mm_node *entry; struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + int ret; BUG_ON(mappable_end > end); @@ -1710,7 +1714,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, /* Mark any preallocated objects as occupied */ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); - int ret; + DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", i915_gem_obj_ggtt_offset(obj), obj->base.size); @@ -1737,6 +1741,20 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); + if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + ret = __hw_ppgtt_init(dev, ppgtt); + if (ret != 0) + return ret; + + dev_priv->mm.aliasing_ppgtt = ppgtt; + } + return 0; } -- GitLab From ae6c4806927b8b0781ecc187aa16b10c820fc430 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:53 +0200 Subject: [PATCH 0145/1868] drm/i915: Only track real ppgtt for a context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a bit a confusion since we track the global gtt, the aliasing and real ppgtt in the ctx->vm pointer. And not all callers really bother to check for the different cases and just presume that it points to a real ppgtt. Now looking closely we don't actually need ->vm to always point at an address space - the only place that cares actually has fixup code already to decide whether to look at the per-proces or the global address space. So switch to just tracking the ppgtt directly and ditch all the extraneous code. v2: Fixup the ppgtt debugfs file to not oops on a NULL ctx->ppgtt. Also drop the early exit - without aliasing ppgtt we want to dump all the ppgtts of the contexts if we have full ppgtt. v3: Actually git add the compile fix. Reviewed-by: Michel Thierry Cc: "Thierry, Michel" Cc: Ville Syrjälä OTC-Jira: VIZ-3724 [danvet: Resolve conflicts with execlist patches while applying.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 11 ++++++-- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem_context.c | 33 +++++++--------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 10 +++++-- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 6 files changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1c3a9943a742..d42db6bc34e0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1821,7 +1821,13 @@ static int per_file_ctx(int id, void *ptr, void *data) { struct intel_context *ctx = ptr; struct seq_file *m = data; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + + if (!ppgtt) { + seq_printf(m, " no ppgtt for context %d\n", + ctx->user_handle); + return 0; + } if (i915_gem_context_is_default(ctx)) seq_puts(m, " default context:\n"); @@ -1881,8 +1887,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset); ppgtt->debug_dump(ppgtt, m); - } else - return; + } list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct drm_i915_file_private *file_priv = file->driver_priv; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2db22732c7ae..eb99a109c0bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -624,7 +624,7 @@ struct intel_context { uint8_t remap_slice; struct drm_i915_file_private *file_priv; struct i915_ctx_hang_stats hang_stats; - struct i915_address_space *vm; + struct i915_hw_ppgtt *ppgtt; /* Legacy ring buffer submission */ struct { @@ -2556,7 +2556,6 @@ i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ -#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7093f5df9ee7..206bf2d6c554 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -135,19 +135,13 @@ static int get_context_size(struct drm_device *dev) void i915_gem_context_free(struct kref *ctx_ref) { struct intel_context *ctx = container_of(ctx_ref, - typeof(*ctx), ref); - struct i915_hw_ppgtt *ppgtt = NULL; + typeof(*ctx), ref); - if (i915.enable_execlists) { - ppgtt = ctx_to_ppgtt(ctx); + if (i915.enable_execlists) intel_lr_context_free(ctx); - } else if (ctx->legacy_hw_ctx.rcs_state) { - /* We refcount even the aliasing PPGTT to keep the code symmetric */ - if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev)) - ppgtt = ctx_to_ppgtt(ctx); - } - i915_ppgtt_put(ppgtt); + i915_ppgtt_put(ctx->ppgtt); + if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); @@ -243,7 +237,6 @@ i915_gem_create_context(struct drm_device *dev, bool create_vm) { const bool is_global_default_ctx = file_priv == NULL; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; int ret = 0; @@ -277,15 +270,10 @@ i915_gem_create_context(struct drm_device *dev, PTR_ERR(ppgtt)); ret = PTR_ERR(ppgtt); goto err_unpin; - } else - ctx->vm = &ppgtt->base; - } else if (USES_PPGTT(dev)) { - /* For platforms which only have aliasing PPGTT, we fake the - * address space and refcounting. */ - ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - i915_ppgtt_get(dev_priv->mm.aliasing_ppgtt); - } else - ctx->vm = &dev_priv->gtt.base; + } + + ctx->ppgtt = ppgtt; + } return ctx; @@ -543,7 +531,6 @@ static int do_switch(struct intel_engine_cs *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct intel_context *from = ring->last_context; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to); u32 hw_flags = 0; bool uninitialized = false; int ret, i; @@ -571,8 +558,8 @@ static int do_switch(struct intel_engine_cs *ring, */ from = ring->last_context; - if (USES_FULL_PPGTT(ring->dev)) { - ret = ppgtt->switch_mm(ppgtt, ring, false); + if (to->ppgtt) { + ret = to->ppgtt->switch_mm(to->ppgtt, ring, false); if (ret) goto unpin_out; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7e04a4825ed0..1a0611bb576b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1315,8 +1315,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_context_reference(ctx); - vm = ctx->vm; - if (!USES_FULL_PPGTT(dev)) + if (ctx->ppgtt) + vm = &ctx->ppgtt->base; + else vm = &dev_priv->gtt.base; eb = eb_create(args); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eab41f9390f8..fc11ac6b0373 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -967,6 +967,12 @@ static void i915_gem_record_rings(struct drm_device *dev, request = i915_gem_find_active_request(ring); if (request) { + struct i915_address_space *vm; + + vm = request->ctx && request->ctx->ppgtt ? + &request->ctx->ppgtt->base : + &dev_priv->gtt.base; + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -974,9 +980,7 @@ static void i915_gem_record_rings(struct drm_device *dev, error->ring[i].batchbuffer = i915_error_object_create(dev_priv, request->batch_obj, - request->ctx ? - request->ctx->vm : - &dev_priv->gtt.base); + vm); if (HAS_BROKEN_CS_TLB(dev_priv->dev) && ring->scratch.obj) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f1b64e01a05..6b5f416b5c0d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -904,7 +904,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) { struct drm_i915_gem_object *ring_obj = ringbuf->obj; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; struct page *page; uint32_t *reg_state; int ret; -- GitLab From d624d86e1e3b69cadb2dad42588e71e9a3b6d70a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:54 +0200 Subject: [PATCH 0146/1868] drm/i915: Drop create_vm argument to i915_gem_create_context Now that all the flow is streamlined the rule is simple: We create a new ppgtt for a new context when we have full ppgtt enabled. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 206bf2d6c554..9683e62ec61a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -233,8 +233,7 @@ __create_hw_context(struct drm_device *dev, */ static struct intel_context * i915_gem_create_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv, - bool create_vm) + struct drm_i915_file_private *file_priv) { const bool is_global_default_ctx = file_priv == NULL; struct intel_context *ctx; @@ -262,7 +261,7 @@ i915_gem_create_context(struct drm_device *dev, } } - if (create_vm) { + if (USES_FULL_PPGTT(dev)) { struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); if (IS_ERR_OR_NULL(ppgtt)) { @@ -345,7 +344,7 @@ int i915_gem_context_init(struct drm_device *dev) } } - ctx = i915_gem_create_context(dev, NULL, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", PTR_ERR(ctx)); @@ -444,7 +443,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) idr_init(&file_priv->context_idr); mutex_lock(&dev->struct_mutex); - ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) { @@ -702,7 +701,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); -- GitLab From 19dd120ceee085dbac70b1b01bd09d599cf87bd0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:55 +0200 Subject: [PATCH 0147/1868] drm/i915: Extract common cleanup into i915_ppgtt_release Address space cleanup isn't really a job for the low-level cleanup callbacks. Without this change we can't reuse the low-level cleanup callback for the aliasing ppgtt cleanup. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b033ff770ac4..c1e2d9a9f9e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -391,9 +391,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - list_del(&vm->global_link); - drm_mm_takedown(&vm->mm); - gen8_ppgtt_unmap_pages(ppgtt); gen8_ppgtt_free(ppgtt); } @@ -974,8 +971,6 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - list_del(&vm->global_link); - drm_mm_takedown(&ppgtt->base.mm); drm_mm_remove_node(&ppgtt->node); gen6_ppgtt_unmap_pages(ppgtt); @@ -1226,6 +1221,9 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + list_del(&ppgtt->base.global_link); + drm_mm_takedown(&ppgtt->base.mm); + ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); } -- GitLab From 90d0a0e8d0e64c92c4a6147f3c7cdc7c544d6b1a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:56 +0200 Subject: [PATCH 0148/1868] drm/i915: Extract commmon global gtt cleanup code We want to move the aliasing ppgtt cleanup back into the global gtt cleanup code for symmetry, but first we need to create such a place. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 ++++++++++++-------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 895f9f2f35ea..c9af48503f76 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1821,7 +1821,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) arch_phys_wc_del(dev_priv->gtt.mtrr); io_mapping_free(dev_priv->gtt.mappable); out_gtt: - dev_priv->gtt.base.cleanup(&dev_priv->gtt.base); + i915_global_gtt_cleanup(dev); out_regs: intel_uncore_fini(dev); pci_iounmap(dev->pdev, dev_priv->regs); @@ -1920,7 +1920,7 @@ int i915_driver_unload(struct drm_device *dev) destroy_workqueue(dev_priv->wq); pm_qos_remove_request(&dev_priv->pm_qos); - dev_priv->gtt.base.cleanup(&dev_priv->gtt.base); + i915_global_gtt_cleanup(dev); intel_uncore_fini(dev); if (dev_priv->regs != NULL) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c1e2d9a9f9e2..8d94e3582045 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1767,6 +1767,18 @@ void i915_gem_init_global_gtt(struct drm_device *dev) i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } +void i915_global_gtt_cleanup(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_address_space *vm = &dev_priv->gtt.base; + + if (drm_mm_initialized(&vm->mm)) { + drm_mm_takedown(&vm->mm); + list_del(&vm->global_link); + } + + vm->cleanup(vm); +} static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2035,10 +2047,6 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); - if (drm_mm_initialized(&vm->mm)) { - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); - } iounmap(gtt->gsm); teardown_scratch_page(vm->dev); } @@ -2071,10 +2079,6 @@ static int i915_gmch_probe(struct drm_device *dev, static void i915_gmch_remove(struct i915_address_space *vm) { - if (drm_mm_initialized(&vm->mm)) { - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); - } intel_gmch_remove(); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 98fbb7309ea5..6280648d4805 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -273,6 +273,7 @@ int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); +void i915_global_gtt_cleanup(struct drm_device *dev); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); -- GitLab From 70e32544aa4027b4c27226da32eb3866e7bbbcdc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:57 +0200 Subject: [PATCH 0149/1868] drm/i915: Cleanup aliasging ppgtt alongside the global gtt Also remove related WARN_ONs which seem to have been hit since a rather long time. But apperently no one noticed since our module reload is already WARNING-infested :( Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ---- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c9af48503f76..04dd6112865e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1388,7 +1388,6 @@ static int i915_load_modeset_init(struct drm_device *dev) i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); - WARN_ON(dev_priv->mm.aliasing_ppgtt); cleanup_irq: drm_irq_uninstall(dev); cleanup_gem_stolen: @@ -1901,7 +1900,6 @@ int i915_driver_unload(struct drm_device *dev) mutex_lock(&dev->struct_mutex); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); - WARN_ON(dev_priv->mm.aliasing_ppgtt); mutex_unlock(&dev->struct_mutex); i915_gem_cleanup_stolen(dev); @@ -1909,8 +1907,6 @@ int i915_driver_unload(struct drm_device *dev) i915_free_hws(dev); } - WARN_ON(!list_empty(&dev_priv->vm_list)); - drm_vblank_cleanup(dev); intel_teardown_gmbus(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8d94e3582045..d97b280861ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1772,6 +1772,12 @@ void i915_global_gtt_cleanup(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct i915_address_space *vm = &dev_priv->gtt.base; + if (dev_priv->mm.aliasing_ppgtt) { + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + + ppgtt->base.cleanup(&ppgtt->base); + } + if (drm_mm_initialized(&vm->mm)) { drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -1779,6 +1785,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev) vm->cleanup(vm); } + static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; -- GitLab From a08a42ad441e113f87308e0844049cb881f1ac1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Aug 2014 19:39:52 +0300 Subject: [PATCH 0150/1868] drm/i915: Don't try to enable cursor from setplane when crtc is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the cursor gets fully clipped when enabling it on a disabled crtc via setplane. This will prevent the lower level code from attempting to enable the cursor in hardware. Cc: Paulo Zanoni Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1bd1aa21a8e9..a93d5ffd3863 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11743,8 +11743,8 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, }; const struct drm_rect clip = { /* integer pixels */ - .x2 = intel_crtc->config.pipe_src_w, - .y2 = intel_crtc->config.pipe_src_h, + .x2 = intel_crtc->active ? intel_crtc->config.pipe_src_w : 0, + .y2 = intel_crtc->active ? intel_crtc->config.pipe_src_h : 0, }; bool visible; int ret; -- GitLab From d7ce484eeec43079ad842f1d351f53998ed6bb30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Aug 2014 19:39:53 +0300 Subject: [PATCH 0151/1868] drm/i915: Move CURSIZE setup to i845_update_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CURSIZE register exists on 845/865 only, so move it to i845_update_cursor(). Changes to cursor size must be done only when the cursor is disabled, so do the write just before enabling the cursor. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a93d5ffd3863..cd3dc3b72798 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8070,6 +8070,7 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base) CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB); if (intel_crtc->cursor_cntl != cntl) { + I915_WRITE(CURSIZE, (64 << 12) | 64); I915_WRITE(_CURACNTR, cntl); POSTING_READ(_CURACNTR); intel_crtc->cursor_cntl = cntl; @@ -8219,7 +8220,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, uint32_t width, uint32_t height) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; unsigned old_width; @@ -8292,9 +8292,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, addr = obj->phys_handle->busaddr; } - if (IS_GEN2(dev)) - I915_WRITE(CURSIZE, (height << 12) | width); - finish: if (intel_crtc->cursor_bo) { if (!INTEL_INFO(dev)->cursor_needs_physical) -- GitLab From 8ac5466926daef2406f7b25e9a272567cb81adb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Aug 2014 19:39:54 +0300 Subject: [PATCH 0152/1868] drm/i915: Unify ivb_update_cursor() and i9xx_update_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since commit 5efb3e2838536832c9b6872512e6b6daf592cee9 Author: Ville Syrjälä Date: Wed Apr 9 13:28:53 2014 +0300 drm/i915/chv: Add cursor pipe offsets the only difference between i9xx_update_cursor() and ivb_update_cursor() was the hsw+ pipe csc handling. Let's unify them and we can rid outselves of some duplicated code. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 41 +--------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cd3dc3b72798..b3fb127131f0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8104,43 +8104,6 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base) } cntl |= pipe << 28; /* Connect to correct pipe */ } - if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURCNTR(pipe), cntl); - POSTING_READ(CURCNTR(pipe)); - intel_crtc->cursor_cntl = cntl; - } - - /* and commit changes on next vblank */ - I915_WRITE(CURBASE(pipe), base); - POSTING_READ(CURBASE(pipe)); -} - -static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - uint32_t cntl; - - cntl = 0; - if (base) { - cntl = MCURSOR_GAMMA_ENABLE; - switch (intel_crtc->cursor_width) { - case 64: - cntl |= CURSOR_MODE_64_ARGB_AX; - break; - case 128: - cntl |= CURSOR_MODE_128_ARGB_AX; - break; - case 256: - cntl |= CURSOR_MODE_256_ARGB_AX; - break; - default: - WARN_ON(1); - return; - } - } if (IS_HASWELL(dev) || IS_BROADWELL(dev)) cntl |= CURSOR_PIPE_CSC_ENABLE; @@ -8199,9 +8162,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE(CURPOS(pipe), pos); - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev)) - ivb_update_cursor(crtc, base); - else if (IS_845G(dev) || IS_I865G(dev)) + if (IS_845G(dev) || IS_I865G(dev)) i845_update_cursor(crtc, base); else i9xx_update_cursor(crtc, base); -- GitLab From dc41c154ffc30afb7ee7e891140dead26fce5c39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Aug 2014 11:57:05 +0300 Subject: [PATCH 0153/1868] drm/i915: Add support for variable cursor size on 845/865 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 845/865 support different cursor sizes as well, albeit a bit differently than later platforms. Add the necessary code to make them work. Untested due to lack of hardware. v2: Warn but accept invalid stride (Chris) Rewrite the cursor size checks for other platforms (Chris) v3: More polish and magic to the cursor size checks (Chris) v4: Moar polish and a comment (Chris) Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +- drivers/gpu/drm/i915/intel_display.c | 111 +++++++++++++++++++++------ drivers/gpu/drm/i915/intel_drv.h | 1 + 3 files changed, 91 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f79c20d49d99..203062e93452 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4128,7 +4128,8 @@ enum punit_power_well { /* Old style CUR*CNTR flags (desktop 8xx) */ #define CURSOR_ENABLE 0x80000000 #define CURSOR_GAMMA_ENABLE 0x40000000 -#define CURSOR_STRIDE_MASK 0x30000000 +#define CURSOR_STRIDE_SHIFT 28 +#define CURSOR_STRIDE(x) ((ffs(x)-9) << CURSOR_STRIDE_SHIFT) /* 256,512,1k,2k */ #define CURSOR_PIPE_CSC_ENABLE (1<<24) #define CURSOR_FORMAT_SHIFT 24 #define CURSOR_FORMAT_MASK (0x07 << CURSOR_FORMAT_SHIFT) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b3fb127131f0..fd15601f6360 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8047,30 +8047,55 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t cntl; + uint32_t cntl = 0, size = 0; - if (base != intel_crtc->cursor_base) { - /* On these chipsets we can only modify the base whilst - * the cursor is disabled. - */ - if (intel_crtc->cursor_cntl) { - I915_WRITE(_CURACNTR, 0); - POSTING_READ(_CURACNTR); - intel_crtc->cursor_cntl = 0; + if (base) { + unsigned int width = intel_crtc->cursor_width; + unsigned int height = intel_crtc->cursor_height; + unsigned int stride = roundup_pow_of_two(width) * 4; + + switch (stride) { + default: + WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", + width, stride); + stride = 256; + /* fallthrough */ + case 256: + case 512: + case 1024: + case 2048: + break; } + cntl |= CURSOR_ENABLE | + CURSOR_GAMMA_ENABLE | + CURSOR_FORMAT_ARGB | + CURSOR_STRIDE(stride); + + size = (height << 12) | width; + } + + if (intel_crtc->cursor_cntl != 0 && + (intel_crtc->cursor_base != base || + intel_crtc->cursor_size != size || + intel_crtc->cursor_cntl != cntl)) { + /* On these chipsets we can only modify the base/size/stride + * whilst the cursor is disabled. + */ + I915_WRITE(_CURACNTR, 0); + POSTING_READ(_CURACNTR); + intel_crtc->cursor_cntl = 0; + } + + if (intel_crtc->cursor_base != base) I915_WRITE(_CURABASE, base); - POSTING_READ(_CURABASE); + + if (intel_crtc->cursor_size != size) { + I915_WRITE(CURSIZE, size); + intel_crtc->cursor_size = size; } - /* XXX width must be 64, stride 256 => 0x00 << 28 */ - cntl = 0; - if (base) - cntl = (CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | - CURSOR_FORMAT_ARGB); if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURSIZE, (64 << 12) | 64); I915_WRITE(_CURACNTR, cntl); POSTING_READ(_CURACNTR); intel_crtc->cursor_cntl = cntl; @@ -8169,6 +8194,43 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, intel_crtc->cursor_base = base; } +static bool cursor_size_ok(struct drm_device *dev, + uint32_t width, uint32_t height) +{ + if (width == 0 || height == 0) + return false; + + /* + * 845g/865g are special in that they are only limited by + * the width of their cursors, the height is arbitrary up to + * the precision of the register. Everything else requires + * square cursors, limited to a few power-of-two sizes. + */ + if (IS_845G(dev) || IS_I865G(dev)) { + if ((width & 63) != 0) + return false; + + if (width > (IS_845G(dev) ? 64 : 512)) + return false; + + if (height > 1023) + return false; + } else { + switch (width | height) { + case 256: + case 128: + if (IS_GEN2(dev)) + return false; + case 64: + break; + default: + return false; + } + } + + return true; +} + /* * intel_crtc_cursor_set_obj - Set cursor to specified GEM object * @@ -8183,7 +8245,7 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned old_width; + unsigned old_width, stride; uint32_t addr; int ret; @@ -8197,14 +8259,13 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, } /* Check for which cursor types we support */ - if (!((width == 64 && height == 64) || - (width == 128 && height == 128 && !IS_GEN2(dev)) || - (width == 256 && height == 256 && !IS_GEN2(dev)))) { + if (!cursor_size_ok(dev, width, height)) { DRM_DEBUG("Cursor dimension not supported\n"); return -EINVAL; } - if (obj->base.size < width * height * 4) { + stride = roundup_pow_of_two(width) * 4; + if (obj->base.size < stride * height) { DRM_DEBUG_KMS("buffer is too small\n"); ret = -ENOMEM; goto fail; @@ -11797,6 +11858,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->cursor_base = ~0; intel_crtc->cursor_cntl = ~0; + intel_crtc->cursor_size = ~0; BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); @@ -12585,7 +12647,10 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.max_height = 8192; } - if (IS_GEN2(dev)) { + if (IS_845G(dev) || IS_I865G(dev)) { + dev->mode_config.cursor_width = IS_845G(dev) ? 64 : 512; + dev->mode_config.cursor_height = 1023; + } else if (IS_GEN2(dev)) { dev->mode_config.cursor_width = GEN2_CURSOR_WIDTH; dev->mode_config.cursor_height = GEN2_CURSOR_HEIGHT; } else { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 666ca8a044ea..579b1d40f934 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -405,6 +405,7 @@ struct intel_crtc { uint32_t cursor_addr; int16_t cursor_width, cursor_height; uint32_t cursor_cntl; + uint32_t cursor_size; uint32_t cursor_base; struct intel_plane_config plane_config; -- GitLab From 7312e2ddec1ffe4511a85a2814df44e79ded3c1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Aug 2014 12:14:12 +0100 Subject: [PATCH 0154/1868] drm/i915: Replace __I915__ with typesafe variant Ville pointed out the GCCism __builtin_types_compatible_p() that we could use to replace our heavily casted presumption __I915__ macro that was based on comparing struct sizes. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 --- drivers/gpu/drm/i915/i915_drv.h | 12 ++++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 04dd6112865e..db56b26a08c9 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1595,9 +1595,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp) return -EINVAL; - /* For the ugly agnostic INTEL_INFO macro */ - BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev)); - dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb99a109c0bc..2af0071efb38 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2004,8 +2004,16 @@ struct drm_i915_cmd_table { }; /* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */ -#define __I915__(p) ((sizeof(*(p)) == sizeof(struct drm_i915_private)) ? \ - (struct drm_i915_private *)(p) : to_i915(p)) +#define __I915__(p) ({ \ + struct drm_i915_private *__p; \ + if (__builtin_types_compatible_p(typeof(*p), struct drm_i915_private)) \ + __p = (struct drm_i915_private *)p; \ + else if (__builtin_types_compatible_p(typeof(*p), struct drm_device)) \ + __p = to_i915((struct drm_device *)p); \ + else \ + BUILD_BUG(); \ + __p; \ +}) #define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) -- GitLab From 82e3b8c130f046b7dd1e7898c10e40edb52fee6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Aug 2014 13:09:46 +0100 Subject: [PATCH 0155/1868] drm/i915: Localise the fbdev console lock frobbing Rather than take and release the console_lock() around a non-existent DRM_I915_FBDEV, move the lock acquisation into the callee where it will be compiled out by the config option entirely. This includes moving the deferred fb_set_suspend() dance and encapsulating it entirely within intel_fbdev.c. v2: Use an integral work item so that we can explicitly flush the work upon suspend/unload. Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Add the flush_work in fbdev_fini per the mailing list discussion. And s/BUG_ON/WARN_ON/ because.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 -- drivers/gpu/drm/i915/i915_drv.c | 28 ++----------------- drivers/gpu/drm/i915/i915_drv.h | 9 +----- drivers/gpu/drm/i915/intel_drv.h | 4 +-- drivers/gpu/drm/i915/intel_fbdev.c | 44 +++++++++++++++++++++++++++++- 5 files changed, 48 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index db56b26a08c9..3f676f904f7e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1350,8 +1350,6 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; - INIT_WORK(&dev_priv->console_resume_work, intel_console_resume); - intel_modeset_gem_init(dev); /* Always safe in the mode setting case. */ @@ -1864,7 +1862,6 @@ int i915_driver_unload(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) { intel_fbdev_fini(dev); intel_modeset_cleanup(dev); - cancel_work_sync(&dev_priv->console_resume_work); /* * free the memory space allocated for the child device diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ec96f9a9724c..01de97776d81 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -558,9 +558,7 @@ static int i915_drm_freeze(struct drm_device *dev) intel_uncore_forcewake_reset(dev, false); intel_opregion_fini(dev); - console_lock(); - intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED); - console_unlock(); + intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true); dev_priv->suspend_count++; @@ -599,18 +597,6 @@ int i915_suspend(struct drm_device *dev, pm_message_t state) return 0; } -void intel_console_resume(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - console_resume_work); - struct drm_device *dev = dev_priv->dev; - - console_lock(); - intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING); - console_unlock(); -} - static int i915_drm_thaw_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -681,17 +667,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) intel_opregion_init(dev); - /* - * The console lock can be pretty contented on resume due - * to all the printk activity. Try to keep it out of the hot - * path of resume if possible. - */ - if (console_trylock()) { - intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING); - console_unlock(); - } else { - schedule_work(&dev_priv->console_resume_work); - } + intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); mutex_lock(&dev_priv->modeset_restore_lock); dev_priv->modeset_restore = MODESET_DONE; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2af0071efb38..541fb6f295bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1586,14 +1586,9 @@ struct drm_i915_private { #ifdef CONFIG_DRM_I915_FBDEV /* list of fbdev register on this device */ struct intel_fbdev *fbdev; + struct work_struct fbdev_suspend_work; #endif - /* - * The console may be contended at resume, but we don't - * want it to block on it. - */ - struct work_struct console_resume_work; - struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; @@ -2225,8 +2220,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); -extern void intel_console_resume(struct work_struct *work); - /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); __printf(3, 4) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 579b1d40f934..4ab0d928b819 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -948,7 +948,7 @@ void intel_dvo_init(struct drm_device *dev); extern int intel_fbdev_init(struct drm_device *dev); extern void intel_fbdev_initial_config(struct drm_device *dev); extern void intel_fbdev_fini(struct drm_device *dev); -extern void intel_fbdev_set_suspend(struct drm_device *dev, int state); +extern void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous); extern void intel_fbdev_output_poll_changed(struct drm_device *dev); extern void intel_fbdev_restore_mode(struct drm_device *dev); #else @@ -965,7 +965,7 @@ static inline void intel_fbdev_fini(struct drm_device *dev) { } -static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state) +static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index f475414671d8..cf052a39558d 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -636,6 +637,15 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, return false; } +static void intel_fbdev_suspend_worker(struct work_struct *work) +{ + intel_fbdev_set_suspend(container_of(work, + struct drm_i915_private, + fbdev_suspend_work)->dev, + FBINFO_STATE_RUNNING, + true); +} + int intel_fbdev_init(struct drm_device *dev) { struct intel_fbdev *ifbdev; @@ -662,6 +672,8 @@ int intel_fbdev_init(struct drm_device *dev) } dev_priv->fbdev = ifbdev; + INIT_WORK(&dev_priv->fbdev_suspend_work, intel_fbdev_suspend_worker); + drm_fb_helper_single_add_all_connectors(&ifbdev->helper); return 0; @@ -682,12 +694,14 @@ void intel_fbdev_fini(struct drm_device *dev) if (!dev_priv->fbdev) return; + flush_work(&dev_priv->fbdev_suspend_work); + intel_fbdev_destroy(dev, dev_priv->fbdev); kfree(dev_priv->fbdev); dev_priv->fbdev = NULL; } -void intel_fbdev_set_suspend(struct drm_device *dev, int state) +void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_fbdev *ifbdev = dev_priv->fbdev; @@ -698,6 +712,33 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state) info = ifbdev->helper.fbdev; + if (synchronous) { + /* Flush any pending work to turn the console on, and then + * wait to turn it off. It must be synchronous as we are + * about to suspend or unload the driver. + * + * Note that from within the work-handler, we cannot flush + * ourselves, so only flush outstanding work upon suspend! + */ + if (state != FBINFO_STATE_RUNNING) + flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); + } else { + /* + * The console lock can be pretty contented on resume due + * to all the printk activity. Try to keep it out of the hot + * path of resume if possible. + */ + WARN_ON(state != FBINFO_STATE_RUNNING); + if (!console_trylock()) { + /* Don't block our own workqueue as this can + * be run in parallel with other i915.ko tasks. + */ + schedule_work(&dev_priv->fbdev_suspend_work); + return; + } + } + /* On resume from hibernation: If the object is shmemfs backed, it has * been restored from swap. If the object is stolen however, it will be * full of whatever garbage was left in there. @@ -706,6 +747,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state) memset_io(info->screen_base, 0, info->screen_size); fb_set_suspend(info, state); + console_unlock(); } void intel_fbdev_output_poll_changed(struct drm_device *dev) -- GitLab From ebc3282409ae4d1e90c2f9608665cc4d8fbf7e73 Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 13 Aug 2014 23:07:05 +0530 Subject: [PATCH 0156/1868] drm/i915: Created common handler for platform specific suspend/resume With this change, intel_runtime_suspend and intel_runtime_resume functions become completely platform agnostic. Platform specific suspend/resume changes are moved to intel_suspend_complete and intel_resume_prepare. Cc: Imre Deak Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: Goel, Akash Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 76 +++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 01de97776d81..b06e975dba39 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -494,6 +494,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) return true; } + +static int intel_suspend_complete(struct drm_i915_private *dev_priv); +static int intel_resume_prepare(struct drm_i915_private *dev_priv); + static int i915_drm_freeze(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -959,14 +963,14 @@ static int i915_pm_poweroff(struct device *dev) return i915_drm_freeze(drm_dev); } -static int hsw_runtime_suspend(struct drm_i915_private *dev_priv) +static int hsw_suspend_complete(struct drm_i915_private *dev_priv) { hsw_enable_pc8(dev_priv); return 0; } -static int snb_runtime_resume(struct drm_i915_private *dev_priv) +static int snb_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -975,7 +979,7 @@ static int snb_runtime_resume(struct drm_i915_private *dev_priv) return 0; } -static int hsw_runtime_resume(struct drm_i915_private *dev_priv) +static int hsw_resume_prepare(struct drm_i915_private *dev_priv) { hsw_disable_pc8(dev_priv); @@ -1271,7 +1275,7 @@ static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR); } -static int vlv_runtime_suspend(struct drm_i915_private *dev_priv) +static int vlv_suspend_complete(struct drm_i915_private *dev_priv) { u32 mask; int err; @@ -1311,7 +1315,7 @@ static int vlv_runtime_suspend(struct drm_i915_private *dev_priv) return err; } -static int vlv_runtime_resume(struct drm_i915_private *dev_priv) +static int vlv_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; int err; @@ -1389,17 +1393,7 @@ static int intel_runtime_suspend(struct device *device) cancel_work_sync(&dev_priv->rps.work); intel_runtime_pm_disable_interrupts(dev); - if (IS_GEN6(dev)) { - ret = 0; - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_runtime_suspend(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_runtime_suspend(dev_priv); - } else { - ret = -ENODEV; - WARN_ON(1); - } - + ret = intel_suspend_complete(dev_priv); if (ret) { DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret); intel_runtime_pm_restore_interrupts(dev); @@ -1437,17 +1431,7 @@ static int intel_runtime_resume(struct device *device) intel_opregion_notify_adapter(dev, PCI_D0); dev_priv->pm.suspended = false; - if (IS_GEN6(dev)) { - ret = snb_runtime_resume(dev_priv); - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_runtime_resume(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_runtime_resume(dev_priv); - } else { - WARN_ON(1); - ret = -ENODEV; - } - + ret = intel_resume_prepare(dev_priv); /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -1466,6 +1450,44 @@ static int intel_runtime_resume(struct device *device) return ret; } +static int intel_suspend_complete(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + int ret; + + if (IS_GEN6(dev)) { + ret = 0; + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = hsw_suspend_complete(dev_priv); + } else if (IS_VALLEYVIEW(dev)) { + ret = vlv_suspend_complete(dev_priv); + } else { + ret = -ENODEV; + WARN_ON(1); + } + + return ret; +} + +static int intel_resume_prepare(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + int ret; + + if (IS_GEN6(dev)) { + ret = snb_resume_prepare(dev_priv); + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = hsw_resume_prepare(dev_priv); + } else if (IS_VALLEYVIEW(dev)) { + ret = vlv_resume_prepare(dev_priv); + } else { + WARN_ON(1); + ret = -ENODEV; + } + + return ret; +} + static const struct dev_pm_ops i915_pm_ops = { .suspend = i915_pm_suspend, .suspend_late = i915_pm_suspend_late, -- GitLab From 016970beb05da6285c2f3ed2bee1c676cb75972e Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 13 Aug 2014 23:07:06 +0530 Subject: [PATCH 0157/1868] drm/i915: Sharing platform specific sequence between runtime and system suspend/ resume paths On VLV, post S0i3 during i915_drm_thaw following issue is observed during ring initialization. [ 335.604039] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 336.607340] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 336.607345] [drm:init_ring_common] ERROR failed to set render ring head to zero ctl 00000000 head 00000000 tail 00000000 start 00000000 [ 337.610645] [drm:stop_ring] ERROR bsd ring :timed out trying to stop ring [ 338.613952] [drm:stop_ring] ERROR bsd ring :timed out trying to stop ring [ 338.613956] [drm:init_ring_common] ERROR failed to set bsd ring head to zero ctl 00000000 head 00000000 tail 00000000 start 00000000 [ 339.617256] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 339.617258] -----------[ cut here ]----------- [ 339.617267] WARNING: CPU: 0 PID: 6 at drivers/gpu/drm/i915/intel_ringbuffer.c:1666 intel_cleanup_ring+0xe6/0xf0() [ 339.617396] --[ end trace 5ef5ed1a3c92e2a6 ]-- [ 339.617428] [drm:__i915_drm_thaw] ERROR failed to re-initialize GPU, declaring wedged! This is happening since wake is not enabled and Gunit registers are not restored. For this system suspend/resume paths need to follow save/restore and additional platform specific setup in suspend_complete and resume_prepare. suspend_complete is shared unconditionaly for VLV, HSW, BDW. resume_prepare for HSW and BDW has pc8 disabling which is needed during thaw_early so sharing uncondtionally. For VLV and SNB runtime resume specific sequence exists. Cc: Imre Deak Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: Goel, Akash Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 63 ++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b06e975dba39..2f112853c36f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -496,7 +496,8 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) static int intel_suspend_complete(struct drm_i915_private *dev_priv); -static int intel_resume_prepare(struct drm_i915_private *dev_priv); +static int intel_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume); static int i915_drm_freeze(struct drm_device *dev) { @@ -604,15 +605,17 @@ int i915_suspend(struct drm_device *dev, pm_message_t state) static int i915_drm_thaw_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int ret; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hsw_disable_pc8(dev_priv); + ret = intel_resume_prepare(dev_priv, false); + if (ret) + DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret); intel_uncore_early_sanitize(dev, true); intel_uncore_sanitize(dev); intel_power_domains_init_hw(dev_priv); - return 0; + return ret; } static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) @@ -888,6 +891,7 @@ static int i915_pm_suspend_late(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); struct drm_i915_private *dev_priv = drm_dev->dev_private; + int ret; /* * We have a suspedn ordering issue with the snd-hda driver also @@ -901,13 +905,16 @@ static int i915_pm_suspend_late(struct device *dev) if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (IS_HASWELL(drm_dev) || IS_BROADWELL(drm_dev)) - hsw_enable_pc8(dev_priv); + ret = intel_suspend_complete(dev_priv); - pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); + if (ret) + DRM_ERROR("Suspend complete failed: %d\n", ret); + else { + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + } - return 0; + return ret; } static int i915_pm_resume_early(struct device *dev) @@ -970,16 +977,19 @@ static int hsw_suspend_complete(struct drm_i915_private *dev_priv) return 0; } -static int snb_resume_prepare(struct drm_i915_private *dev_priv) +static int snb_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; - intel_init_pch_refclk(dev); + if (rpm_resume) + intel_init_pch_refclk(dev); return 0; } -static int hsw_resume_prepare(struct drm_i915_private *dev_priv) +static int hsw_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { hsw_disable_pc8(dev_priv); @@ -1315,7 +1325,8 @@ static int vlv_suspend_complete(struct drm_i915_private *dev_priv) return err; } -static int vlv_resume_prepare(struct drm_i915_private *dev_priv) +static int vlv_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; int err; @@ -1340,8 +1351,10 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv) vlv_check_no_gt_access(dev_priv); - intel_init_clock_gating(dev); - i915_gem_restore_fences(dev); + if (rpm_resume) { + intel_init_clock_gating(dev); + i915_gem_restore_fences(dev); + } return ret; } @@ -1431,7 +1444,7 @@ static int intel_runtime_resume(struct device *device) intel_opregion_notify_adapter(dev, PCI_D0); dev_priv->pm.suspended = false; - ret = intel_resume_prepare(dev_priv); + ret = intel_resume_prepare(dev_priv, true); /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -1450,6 +1463,10 @@ static int intel_runtime_resume(struct device *device) return ret; } +/* + * This function implements common functionality of runtime and system + * suspend sequence. + */ static int intel_suspend_complete(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -1469,17 +1486,23 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv) return ret; } -static int intel_resume_prepare(struct drm_i915_private *dev_priv) +/* + * This function implements common functionality of runtime and system + * resume sequence. Variable rpm_resume used for implementing different + * code paths. + */ +static int intel_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; int ret; if (IS_GEN6(dev)) { - ret = snb_resume_prepare(dev_priv); + ret = snb_resume_prepare(dev_priv, rpm_resume); } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_resume_prepare(dev_priv); + ret = hsw_resume_prepare(dev_priv, rpm_resume); } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_resume_prepare(dev_priv); + ret = vlv_resume_prepare(dev_priv, rpm_resume); } else { WARN_ON(1); ret = -ENODEV; -- GitLab From 3a448734902359113b0c7c3454ce4cd56dc1e61f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Aug 2014 20:05:47 +0100 Subject: [PATCH 0158/1868] drm/i915: Print captured bo for all VM in error state The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gpu_error.c | 80 +++++++++++++++++++-------- 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 541fb6f295bb..ed52ac744105 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -396,6 +396,7 @@ struct drm_i915_error_state { pid_t pid; char comm[TASK_COMM_LEN]; } ring[I915_NUM_RINGS]; + struct drm_i915_error_buffer { u32 size; u32 name; @@ -414,6 +415,7 @@ struct drm_i915_error_state { } **active_bo, **pinned_bo; u32 *active_bo_count, *pinned_bo_count; + u32 vm_count; }; struct intel_connector; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fc11ac6b0373..35e70d5d6282 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { - err_printf(m, "%s [%d]:\n", name, count); + err_printf(m, " %s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x %8u %02x %02x %x %x", + err_printf(m, " %08x %8u %02x %02x %x %x", err->gtt_offset, err->size, err->read_domains, @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, i915_ring_error_state(m, dev, &error->ring[i]); } - if (error->active_bo) + for (i = 0; i < error->vm_count; i++) { + err_printf(m, "vm[%d]\n", i); + print_error_buffers(m, "Active", - error->active_bo[0], - error->active_bo_count[0]); + error->active_bo[i], + error->active_bo_count[i]); - if (error->pinned_bo) print_error_buffers(m, "Pinned", - error->pinned_bo[0], - error->pinned_bo_count[0]); + error->pinned_bo[i], + error->pinned_bo_count[i]); + } for (i = 0; i < ARRAY_SIZE(error->ring); i++) { obj = error->ring[i].batchbuffer; @@ -644,13 +646,15 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv, (src)->base.size>>PAGE_SHIFT) static void capture_bo(struct drm_i915_error_buffer *err, - struct drm_i915_gem_object *obj) + struct i915_vma *vma) { + struct drm_i915_gem_object *obj = vma->obj; + err->size = obj->base.size; err->name = obj->base.name; err->rseqno = obj->last_read_seqno; err->wseqno = obj->last_write_seqno; - err->gtt_offset = i915_gem_obj_ggtt_offset(obj); + err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; err->fence_reg = obj->fence_reg; @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, int i = 0; list_for_each_entry(vma, head, mm_list) { - capture_bo(err++, vma->obj); + capture_bo(err++, vma); if (++i == count) break; } @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, } static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) + int count, struct list_head *head, + struct i915_address_space *vm) { struct drm_i915_gem_object *obj; - int i = 0; + struct drm_i915_error_buffer * const first = err; + struct drm_i915_error_buffer * const last = err + count; list_for_each_entry(obj, head, global_list) { - if (!i915_gem_obj_is_pinned(obj)) - continue; + struct i915_vma *vma; - capture_bo(err++, obj); - if (++i == count) + if (err == last) break; + + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + capture_bo(err++, vma); + break; + } } - return i; + return err - first; } /* Generate a semi-unique error code. The code is not meant to have meaning, The @@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, list_for_each_entry(vma, &vm->active_list, mm_list) i++; error->active_bo_count[ndx] = i; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (i915_gem_obj_is_pinned(obj)) - i++; + + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + i++; + break; + } + } error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; if (i) { @@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->pinned_bo_count[ndx] = capture_pinned_bo(pinned_bo, error->pinned_bo_count[ndx], - &dev_priv->mm.bound_list); + &dev_priv->mm.bound_list, vm); error->active_bo[ndx] = active_bo; error->pinned_bo[ndx] = pinned_bo; } @@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), GFP_ATOMIC); - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - i915_gem_capture_vm(dev_priv, error, vm, i++); + if (error->active_bo == NULL || + error->pinned_bo == NULL || + error->active_bo_count == NULL || + error->pinned_bo_count == NULL) { + kfree(error->active_bo); + kfree(error->active_bo_count); + kfree(error->pinned_bo); + kfree(error->pinned_bo_count); + + error->active_bo = NULL; + error->active_bo_count = NULL; + error->pinned_bo = NULL; + error->pinned_bo_count = NULL; + } else { + list_for_each_entry(vm, &dev_priv->vm_list, global_link) + i915_gem_capture_vm(dev_priv, error, vm, i++); + + error->vm_count = cnt; + } } /* Capture all registers which don't fit into another category. */ -- GitLab From 582d67f0b19afc2299bc8977aba835d8d25bb591 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:16 +0100 Subject: [PATCH 0159/1868] drm/i915: Add temporary ring->ctx backpointer The execlist patches have a bit a convoluted and long history and due to that have the actual submission still misplaced deeply burried in the low-level ringbuffer handling code. This design goes back to the legacy ringbuffer code with its tricky lazy request and simple work submissiion using ring tail writes. For that reason they need a ring->ctx backpointer. The goal is to unburry that code and move it up into a level where the full execlist context is available so that we can ditch this backpointer. Until that's done make it really obvious that there's work still to be done. Cc: Oscar Mateo Cc: Thomas Daniel Acked-by: Thomas Daniel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6b5f416b5c0d..c2352d1b23fa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1086,6 +1086,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, } ringbuf->ring = ring; + ringbuf->FIXME_lrc_ctx = ctx; + ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 24437da91f77..26785ca72530 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -99,6 +99,13 @@ struct intel_ringbuffer { struct intel_engine_cs *ring; + /* + * FIXME: This backpointer is an artifact of the history of how the + * execlist patches came into being. It will get removed once the basic + * code has landed. + */ + struct intel_context *FIXME_lrc_ctx; + u32 head; u32 tail; int space; -- GitLab From 295ee85316aedfe1878306d71b5e9c7d4498fb1b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Jul 2014 14:23:44 +0200 Subject: [PATCH 0160/1868] drm: Docbook fixes Bunch of small leftovers spotted by looking at the make htmldocs output. I've left out dp mst, there's too much amiss there. v2: Also add the missing parameter docbook in the dp mst code - Dave Airlie correctly pointed out that we don't actually want kerneldoc for the missing structure members in header files. Cc: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 5 +++-- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + drivers/gpu/drm/drm_edid.c | 2 +- drivers/gpu/drm/drm_modeset_lock.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 285e62a134b2..f09b75212081 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3512,9 +3512,10 @@ EXPORT_SYMBOL(drm_property_create_enum); * @flags: flags specifying the property type * @name: name of the property * @props: enumeration lists with property bitflags - * @num_values: number of pre-defined values + * @num_props: size of the @props array + * @supported_bits: bitmask of all supported enumeration values * - * This creates a new generic drm property which can then be attached to a drm + * This creates a new bitmask drm property which can then be attached to a drm * object with drm_object_attach_property. The returned property object must be * freed with drm_property_destroy. * diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ac3c2738db94..352f5d6c763c 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2071,6 +2071,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) * drm_dp_mst_hpd_irq() - MST hotplug IRQ notify * @mgr: manager to notify irq for. * @esi: 4 bytes from SINK_COUNT_ESI + * @handled: whether the hpd interrupt was consumed or not * * This should be called from the driver when it detects a short IRQ, * along with the value of the DEVICE_SERVICE_IRQ_VECTOR_ESI0. The diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 1dbf3bc4c6a3..f905c63c0f68 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3433,10 +3433,10 @@ EXPORT_SYMBOL(drm_rgb_quant_range_selectable); /** * drm_assign_hdmi_deep_color_info - detect whether monitor supports * hdmi deep color modes and update drm_display_info if so. - * * @edid: monitor EDID information * @info: Updated with maximum supported deep color bpc and color format * if deep color supported. + * @connector: DRM connector, used only for debug output * * Parse the CEA extension according to CEA-861-B. * Return true if HDMI deep color supported, false if not or unknown. diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 5280b64a0230..8749fc06570e 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL(drm_modeset_lock_crtc); /** * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls - * crtc: drm crtc + * @crtc: drm crtc * * Legacy ioctl operations like cursor updates or page flips only have per-crtc * locking, and store the acquire ctx in the corresponding crtc. All other -- GitLab From c11cda52193dfa459dfea38f00b19bc9325fa922 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 18:50:18 +0100 Subject: [PATCH 0161/1868] drm: Don't return 0 for a value used as a denominator Static analysis will be unhappy if a function can theoretically return 0 and we're trying to divide by that value. Mark that case that cannot occur as a BUG() instead. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 352f5d6c763c..b3adf1445020 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1772,7 +1772,7 @@ static int drm_dp_get_vc_payload_bw(int dp_link_bw, int dp_link_count) case DP_LINK_BW_5_4: return 10 * dp_link_count; } - return 0; + BUG(); } /** -- GitLab From 14f476fa24e81d0beea1aa14d763102958518d60 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 19:15:20 +0100 Subject: [PATCH 0162/1868] drm: Use the type of the array element when reallocating Static analysers find it 'suspicious', that we're trying to allocate memory for elements of size sizeof(struct drm_fb_helper_connector) when the array is defined as struct drm_fb_helper_connector **. Use sizeof(struct drm_fb_helper_connector *) instead. Note that the structure being defined as: struct drm_fb_helper_connector { struct drm_connector *connector; }; This was still doing the right thing, but may not in the future if additional fields are added. Cc: Todd Previte Cc: Dave Airlie Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 7b7b9565188f..6019392b19cc 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -126,7 +126,7 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_ WARN_ON(!mutex_is_locked(&fb_helper->dev->mode_config.mutex)); if (fb_helper->connector_count + 1 > fb_helper->connector_info_alloc_count) { - temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector) * (fb_helper->connector_count + 1), GFP_KERNEL); + temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector *) * (fb_helper->connector_count + 1), GFP_KERNEL); if (!temp) return -ENOMEM; -- GitLab From 48e29f5535b9eb506c44bd8f41bd9348fd219435 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:29 +0100 Subject: [PATCH 0163/1868] drm/i915/bdw: Emission of requests with logical rings On a previous iteration of this patch, I created an Execlists version of __i915_add_request and asbtracted it away as a vfunc. Daniel Vetter wondered then why that was needed: "with the clean split in command submission I expect every function to know wether it'll submit to an lrc (everything in intel_lrc.c) or wether it'll submit to a legacy ring (existing code), so I don't see a need for an add_request vfunc." The honest, hairy truth is that this patch is the glue keeping the whole logical ring puzzle together: - i915_add_request is used by intel_ring_idle, which in turn is used by i915_gpu_idle, which in turn is used in several places inside the eviction and gtt codes. - Also, it is used by i915_gem_check_olr, which is littered all over i915_gem.c - ... If I were to duplicate all the code that directly or indirectly uses __i915_add_request, I'll end up creating a separate driver. To show the differences between the existing legacy version and the new Execlists one, this time I have special-cased __i915_add_request instead of adding an add_request vfunc. I hope this helps to untangle this Gordian knot. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Adjust to ringbuf->FIXME_lrc_ctx per the discussion with Thomas Daniel.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 72 +++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_lrc.c | 30 +++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 1 + 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c2f0b886eb0..32fa1e9eb844 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2311,10 +2311,21 @@ int __i915_add_request(struct intel_engine_cs *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; + struct intel_ringbuffer *ringbuf; u32 request_ring_position, request_start; int ret; - request_start = intel_ring_get_tail(ring->buffer); + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) + return -ENOMEM; + + if (i915.enable_execlists) { + struct intel_context *ctx = request->ctx; + ringbuf = ctx->engine[ring->id].ringbuf; + } else + ringbuf = ring->buffer; + + request_start = intel_ring_get_tail(ringbuf); /* * Emit any outstanding flushes - execbuf can fail to emit the flush * after having emitted the batchbuffer command. Hence we need to fix @@ -2322,24 +2333,32 @@ int __i915_add_request(struct intel_engine_cs *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; - - request = ring->preallocated_lazy_request; - if (WARN_ON(request == NULL)) - return -ENOMEM; + if (i915.enable_execlists) { + ret = logical_ring_flush_all_caches(ringbuf); + if (ret) + return ret; + } else { + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; + } /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the * position of the head. */ - request_ring_position = intel_ring_get_tail(ring->buffer); + request_ring_position = intel_ring_get_tail(ringbuf); - ret = ring->add_request(ring); - if (ret) - return ret; + if (i915.enable_execlists) { + ret = ring->emit_request(ringbuf); + if (ret) + return ret; + } else { + ret = ring->add_request(ring); + if (ret) + return ret; + } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; @@ -2354,12 +2373,14 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->batch_obj = obj; - /* Hold a reference to the current context so that we can inspect - * it later in case a hangcheck error event fires. - */ - request->ctx = ring->last_context; - if (request->ctx) - i915_gem_context_reference(request->ctx); + if (!i915.enable_execlists) { + /* Hold a reference to the current context so that we can inspect + * it later in case a hangcheck error event fires. + */ + request->ctx = ring->last_context; + if (request->ctx) + i915_gem_context_reference(request->ctx); + } request->emitted_jiffies = jiffies; list_add_tail(&request->list, &ring->request_list); @@ -2614,6 +2635,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; + struct intel_ringbuffer *ringbuf; request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, @@ -2623,12 +2645,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) break; trace_i915_gem_request_retire(ring, request->seqno); + + /* This is one of the few common intersection points + * between legacy ringbuffer submission and execlists: + * we need to tell them apart in order to find the correct + * ringbuffer to which the request belongs to. + */ + if (i915.enable_execlists) { + struct intel_context *ctx = request->ctx; + ringbuf = ctx->engine[ring->id].ringbuf; + } else + ringbuf = ring->buffer; + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position * of the GPU head. */ - ring->buffer->last_retired_head = request->tail; + ringbuf->last_retired_head = request->tail; i915_gem_free_request(request); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c2352d1b23fa..cd6ddd80e54c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -252,6 +252,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + int ret; + + if (!ring->gpu_caches_dirty) + return 0; + + ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + return 0; +} + void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { intel_logical_ring_advance(ringbuf); @@ -262,7 +278,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) /* TODO: how to submit a context to the ELSP is not here yet */ } -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, + struct intel_context *ctx) { if (ring->outstanding_lazy_seqno) return 0; @@ -274,6 +291,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) if (request == NULL) return -ENOMEM; + /* Hold a reference to the context this request belongs to + * (we will need it when the time comes to emit/retire the + * request). + */ + request->ctx = ctx; + i915_gem_context_reference(request->ctx); + ring->preallocated_lazy_request = request; } @@ -312,8 +336,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, if (ret) return ret; - /* TODO: make sure we update the right ringbuffer's last_retired_head - * when retiring requests */ i915_gem_retire_requests_ring(ring); ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; @@ -433,7 +455,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) return ret; /* Preallocate the olr before touching the ring */ - ret = logical_ring_alloc_seqno(ring); + ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4e032875c1fd..460e1af15600 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { -- GitLab From 84b790f80e5153d8d54074aa4eae49ff3070f2f1 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 24 Jul 2014 17:04:36 +0100 Subject: [PATCH 0164/1868] drm/i915/bdw: Implement context switching (somewhat) A context switch occurs by submitting a context descriptor to the ExecList Submission Port. Given that we can now initialize a context, it's possible to begin implementing the context switch by creating the descriptor and submitting it to ELSP (actually two, since the ELSP has two ports). The context object must be mapped in the GGTT, which means it must exist in the 0-4GB graphics VA range. Signed-off-by: Ben Widawsky v2: This code has changed quite a lot in various rebases. Of particular importance is that now we use the globally unique Submission ID to send to the hardware. Also, context pages are now pinned unconditionally to GGTT, so there is no need to bind them. v3: Use LRCA[31:12] as hwCtxId[19:0]. This guarantees that the HW context ID we submit to the ELSP is globally unique and != 0 (Bspec requirements of the software use-only bits of the Context ID in the Context Descriptor Format) without the hassle of the previous submission Id construction. Also, re-add the ELSP porting read (it was dropped somewhere during the rebases). v4: - Squash with "drm/i915/bdw: Add forcewake lock around ELSP writes" (BSPEC says: "SW must set Force Wakeup bit to prevent GT from entering C6 while ELSP writes are in progress") as noted by Thomas Daniel (thomas.daniel@intel.com). - Rename functions and use an execlists/intel_execlists_ namespace. - The BUG_ON only checked that the LRCA was <32 bits, but it didn't make sure that it was properly aligned. Spotted by Alistair Mcaulay . v5: - Improved source code comments as suggested by Chris Wilson. - No need to abstract submit_ctx away, as pointed by Brad Volkin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch. Sigh.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 116 ++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 1 + 2 files changed, 115 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cd6ddd80e54c..aa81fd41b9c1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -47,6 +47,7 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 #define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) #define CTX_LRI_HEADER_0 0x01 @@ -78,6 +79,26 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) +enum { + ADVANCED_CONTEXT = 0, + LEGACY_CONTEXT, + ADVANCED_AD_CONTEXT, + LEGACY_64B_CONTEXT +}; +#define GEN8_CTX_MODE_SHIFT 3 +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; +#define GEN8_CTX_ID_SHIFT 32 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -92,6 +113,93 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) +{ + u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); + + /* LRCA is required to be 4K aligned so the more significant 20 bits + * are globally unique */ + return lrca >> 12; +} + +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) +{ + uint64_t desc; + uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); + BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + desc = GEN8_CTX_VALID; + desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_L3LLC_COHERENT; + desc |= GEN8_CTX_PRIVILEGE; + desc |= lrca; + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers */ + /* desc |= GEN8_CTX_FORCE_RESTORE; */ + + return desc; +} + +static void execlists_elsp_write(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj0, + struct drm_i915_gem_object *ctx_obj1) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + uint64_t temp = 0; + uint32_t desc[4]; + + /* XXX: You must always write both descriptors in the order below. */ + if (ctx_obj1) + temp = execlists_ctx_descriptor(ctx_obj1); + else + temp = 0; + desc[1] = (u32)(temp >> 32); + desc[0] = (u32)temp; + + temp = execlists_ctx_descriptor(ctx_obj0); + desc[3] = (u32)(temp >> 32); + desc[2] = (u32)temp; + + /* Set Force Wakeup bit to prevent GT from entering C6 while + * ELSP writes are in progress */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(RING_ELSP(ring), desc[1]); + I915_WRITE(RING_ELSP(ring), desc[0]); + I915_WRITE(RING_ELSP(ring), desc[3]); + /* The context is automatically loaded after the following */ + I915_WRITE(RING_ELSP(ring), desc[2]); + + /* ELSP is a wo register, so use another nearby reg for posting instead */ + POSTING_READ(RING_EXECLIST_STATUS(ring)); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +} + +static int execlists_submit_context(struct intel_engine_cs *ring, + struct intel_context *to0, u32 tail0, + struct intel_context *to1, u32 tail1) +{ + struct drm_i915_gem_object *ctx_obj0; + struct drm_i915_gem_object *ctx_obj1 = NULL; + + ctx_obj0 = to0->engine[ring->id].state; + BUG_ON(!ctx_obj0); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + + if (to1) { + ctx_obj1 = to1->engine[ring->id].state; + BUG_ON(!ctx_obj1); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + } + + execlists_elsp_write(ring, ctx_obj0, ctx_obj1); + + return 0; +} + static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -270,12 +378,16 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { + struct intel_engine_cs *ring = ringbuf->ring; + struct intel_context *ctx = ringbuf->FIXME_lrc_ctx; + intel_logical_ring_advance(ringbuf); - if (intel_ring_stopped(ringbuf->ring)) + if (intel_ring_stopped(ring)) return; - /* TODO: how to submit a context to the ELSP is not here yet */ + /* FIXME: too cheeky, we don't even check if the ELSP is ready */ + execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0); } static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 460e1af15600..69605b158235 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -57,5 +57,6 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 flags); +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); #endif /* _INTEL_LRC_H_ */ -- GitLab From ae1250b9da308acd16554365d125b4afb795b825 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:37 +0100 Subject: [PATCH 0165/1868] drm/i915/bdw: Write the tail pointer, LRC style Each logical ring context has the tail pointer in the context object, so update it before submission. v2: New namespace. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index aa81fd41b9c1..26bc063f137b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -178,6 +178,21 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } +static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) +{ + struct page *page; + uint32_t *reg_state; + + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + reg_state[CTX_RING_TAIL+1] = tail; + + kunmap_atomic(reg_state); + + return 0; +} + static int execlists_submit_context(struct intel_engine_cs *ring, struct intel_context *to0, u32 tail0, struct intel_context *to1, u32 tail1) @@ -189,10 +204,14 @@ static int execlists_submit_context(struct intel_engine_cs *ring, BUG_ON(!ctx_obj0); BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + execlists_ctx_write_tail(ctx_obj0, tail0); + if (to1) { ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + + execlists_ctx_write_tail(ctx_obj1, tail1); } execlists_elsp_write(ring, ctx_obj0, ctx_obj1); -- GitLab From acdd884a2e1b873995c120d5eabd8cab77f48f20 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 24 Jul 2014 17:04:38 +0100 Subject: [PATCH 0166/1868] drm/i915/bdw: Two-stage execlist submit process Context switch (and execlist submission) should happen only when other contexts are not active, otherwise pre-emption occurs. To assure this, we place context switch requests in a queue and those request are later consumed when the right context switch interrupt is received (still TODO). v2: Use a spinlock, do not remove the requests on unqueue (wait for context switch completion). Signed-off-by: Thomas Daniel v3: Several rebases and code changes. Use unique ID. v4: - Move the queue/lock init to the late ring initialization. - Damien's kmalloc review comments: check return, use sizeof(*req), do not cast. v5: - Do not reuse drm_i915_gem_request. Instead, create our own. - New namespace. Signed-off-by: Michel Thierry (v1) Signed-off-by: Oscar Mateo (v2-v5) Reviewed-by: Damien Lespiau [davnet: Checkpatch + wash-up s/BUG_ON/WARN_ON/.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 8 +++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 26bc063f137b..e81f5f6c49b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -126,7 +126,8 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) { uint64_t desc; uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); - BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); desc = GEN8_CTX_VALID; desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; @@ -202,14 +203,14 @@ static int execlists_submit_context(struct intel_engine_cs *ring, ctx_obj0 = to0->engine[ring->id].state; BUG_ON(!ctx_obj0); - BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0)); execlists_ctx_write_tail(ctx_obj0, tail0); if (to1) { ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); - BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1)); execlists_ctx_write_tail(ctx_obj1, tail1); } @@ -219,6 +220,65 @@ static int execlists_submit_context(struct intel_engine_cs *ring, return 0; } +static void execlists_context_unqueue(struct intel_engine_cs *ring) +{ + struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; + struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + + if (list_empty(&ring->execlist_queue)) + return; + + /* Try to read in pairs */ + list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, + execlist_link) { + if (!req0) { + req0 = cursor; + } else if (req0->ctx == cursor->ctx) { + /* Same ctx: ignore first request, as second request + * will update tail past first request's workload */ + list_del(&req0->execlist_link); + i915_gem_context_unreference(req0->ctx); + kfree(req0); + req0 = cursor; + } else { + req1 = cursor; + break; + } + } + + WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail, + req1 ? req1->ctx : NULL, + req1 ? req1->tail : 0)); +} + +static int execlists_context_queue(struct intel_engine_cs *ring, + struct intel_context *to, + u32 tail) +{ + struct intel_ctx_submit_request *req = NULL; + unsigned long flags; + bool was_empty; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (req == NULL) + return -ENOMEM; + req->ctx = to; + i915_gem_context_reference(req->ctx); + req->ring = ring; + req->tail = tail; + + spin_lock_irqsave(&ring->execlist_lock, flags); + + was_empty = list_empty(&ring->execlist_queue); + list_add_tail(&req->execlist_link, &ring->execlist_queue); + if (was_empty) + execlists_context_unqueue(ring); + + spin_unlock_irqrestore(&ring->execlist_lock, flags); + + return 0; +} + static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -405,8 +465,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) if (intel_ring_stopped(ring)) return; - /* FIXME: too cheeky, we don't even check if the ELSP is ready */ - execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0); + execlists_context_queue(ring, ctx, ringbuf->tail); } static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, @@ -846,6 +905,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->request_list); init_waitqueue_head(&ring->irq_queue); + INIT_LIST_HEAD(&ring->execlist_queue); + spin_lock_init(&ring->execlist_lock); + ret = intel_lr_context_deferred_create(dctx, ring); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 69605b158235..3c389b3a2b75 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -59,4 +59,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, u64 exec_start, u32 flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +struct intel_ctx_submit_request { + struct intel_context *ctx; + struct intel_engine_cs *ring; + u32 tail; + + struct list_head execlist_link; +}; + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 26785ca72530..670262dabb6c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -231,6 +231,8 @@ struct intel_engine_cs { } semaphore; /* Execlists */ + spinlock_t execlist_lock; + struct list_head execlist_queue; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, -- GitLab From e981e7b17f2b41970e7e2367d4225e0bb3310667 Mon Sep 17 00:00:00 2001 From: Thomas Daniel Date: Thu, 24 Jul 2014 17:04:39 +0100 Subject: [PATCH 0167/1868] drm/i915/bdw: Handle context switch events Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett ). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with "drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt", as suggested by Daniel. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 35 +++++-- drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 3 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 155 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 00957fa0b877..f5d6795887d2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1632,6 +1632,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, struct drm_i915_private *dev_priv, u32 master_ctl) { + struct intel_engine_cs *ring; u32 rcs, bcs, vcs; uint32_t tmp = 0; irqreturn_t ret = IRQ_NONE; @@ -1641,14 +1642,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(0), tmp); ret = IRQ_HANDLED; + rcs = tmp >> GEN8_RCS_IRQ_SHIFT; - bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + ring = &dev_priv->ring[RCS]; if (rcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[RCS]); + notify_ring(dev, ring); + if (rcs & GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); + + bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + ring = &dev_priv->ring[BCS]; if (bcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[BCS]); - if ((rcs | bcs) & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + notify_ring(dev, ring); + if (bcs & GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1658,16 +1665,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(1), tmp); ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; + ring = &dev_priv->ring[VCS]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VCS]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); + vcs = tmp >> GEN8_VCS2_IRQ_SHIFT; + ring = &dev_priv->ring[VCS2]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VCS2]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1688,11 +1699,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(3), tmp); ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VECS_IRQ_SHIFT; + ring = &dev_priv->ring[VECS]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VECS]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e81f5f6c49b9..22f6a7c0cb18 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -49,6 +49,22 @@ #define RING_ELSP(ring) ((ring)->mmio_base+0x230) #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + +#define RING_EXECLIST_QFULL (1 << 0x2) +#define RING_EXECLIST1_VALID (1 << 0x3) +#define RING_EXECLIST0_VALID (1 << 0x4) +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) +#define RING_EXECLIST1_ACTIVE (1 << 0x11) +#define RING_EXECLIST0_ACTIVE (1 << 0x12) + +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 @@ -150,6 +166,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, struct drm_i915_private *dev_priv = ring->dev->dev_private; uint64_t temp = 0; uint32_t desc[4]; + unsigned long flags; /* XXX: You must always write both descriptors in the order below. */ if (ctx_obj1) @@ -163,9 +180,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp; - /* Set Force Wakeup bit to prevent GT from entering C6 while - * ELSP writes are in progress */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + /* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes + * are in progress. + * + * The other problem is that we can't just call gen6_gt_force_wake_get() + * because that function calls intel_runtime_pm_get(), which might sleep. + * Instead, we do the runtime_pm_get/put when creating/destroying requests. + */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (dev_priv->uncore.forcewake_count++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); I915_WRITE(RING_ELSP(ring), desc[1]); I915_WRITE(RING_ELSP(ring), desc[0]); @@ -176,7 +201,11 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, /* ELSP is a wo register, so use another nearby reg for posting instead */ POSTING_READ(RING_EXECLIST_STATUS(ring)); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + /* Release Force Wakeup (see the big comment above). */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (--dev_priv->uncore.forcewake_count == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); } static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) @@ -224,6 +253,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) { struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + assert_spin_locked(&ring->execlist_lock); if (list_empty(&ring->execlist_queue)) return; @@ -237,8 +269,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ list_del(&req0->execlist_link); - i915_gem_context_unreference(req0->ctx); - kfree(req0); + queue_work(dev_priv->wq, &req0->work); req0 = cursor; } else { req1 = cursor; @@ -251,11 +282,97 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) req1 ? req1->tail : 0)); } +static bool execlists_check_remove_request(struct intel_engine_cs *ring, + u32 request_id) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct intel_ctx_submit_request *head_req; + + assert_spin_locked(&ring->execlist_lock); + + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (head_req != NULL) { + struct drm_i915_gem_object *ctx_obj = + head_req->ctx->engine[ring->id].state; + if (intel_execlists_ctx_id(ctx_obj) == request_id) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } + } + + return false; +} + +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 status_id; + u32 submit_contexts = 0; + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + + spin_lock(&ring->execlist_lock); + + while (read_pointer < write_pointer) { + read_pointer++; + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8); + status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8 + 4); + + if (status & GEN8_CTX_STATUS_COMPLETE) { + if (execlists_check_remove_request(ring, status_id)) + submit_contexts++; + } + } + + if (submit_contexts != 0) + execlists_context_unqueue(ring); + + spin_unlock(&ring->execlist_lock); + + WARN(submit_contexts > 2, "More than two context complete events?\n"); + ring->next_context_status_buffer = write_pointer % 6; + + I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), + ((u32)ring->next_context_status_buffer & 0x07) << 8); +} + +static void execlists_free_request_task(struct work_struct *work) +{ + struct intel_ctx_submit_request *req = + container_of(work, struct intel_ctx_submit_request, work); + struct drm_device *dev = req->ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + intel_runtime_pm_put(dev_priv); + + mutex_lock(&dev->struct_mutex); + i915_gem_context_unreference(req->ctx); + mutex_unlock(&dev->struct_mutex); + + kfree(req); +} + static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { struct intel_ctx_submit_request *req = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; bool was_empty; @@ -266,6 +383,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, i915_gem_context_reference(req->ctx); req->ring = ring; req->tail = tail; + INIT_WORK(&req->work, execlists_free_request_task); + + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(&ring->execlist_lock, flags); @@ -907,6 +1027,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->execlist_queue); spin_lock_init(&ring->execlist_lock); + ring->next_context_status_buffer = 0; ret = intel_lr_context_deferred_create(dctx, ring); if (ret) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3c389b3a2b75..a3f135cf439e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -65,6 +65,9 @@ struct intel_ctx_submit_request { u32 tail; struct list_head execlist_link; + struct work_struct work; }; +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 670262dabb6c..9cbf7b0ebc99 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -233,6 +233,7 @@ struct intel_engine_cs { /* Execlists */ spinlock_t execlist_lock; struct list_head execlist_queue; + u8 next_context_status_buffer; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, -- GitLab From e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:40 +0100 Subject: [PATCH 0168/1868] drm/i915/bdw: Avoid non-lite-restore preemptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current Execlists feeding mechanism, full preemption is not supported yet: only lite-restores are allowed (this is: the GPU simply samples a new tail pointer for the context currently in execution). But we have identified an scenario in which a full preemption occurs: 1) We submit two contexts for execution (A & B). 2) The GPU finishes with the first one (A), switches to the second one (B) and informs us. 3) We submit B again (hoping to cause a lite restore) together with C, but in the time we spend writing to the ELSP, the GPU finishes B. 4) The GPU start executing B again (since we told it so). 5) We receive a B finished interrupt and, mistakenly, we submit C (again) and D, causing a full preemption of B. The race is avoided by keeping track of how many times a context has been submitted to the hardware and by better discriminating the received context switch interrupts: in the example, when we have submitted B twice, we won´t submit C and D as soon as we receive the notification that B is completed because we were expecting to get a LITE_RESTORE and we didn´t, so we know a second completion will be received shortly. Without this explicit checking, somehow, the batch buffer execution order gets messed with. This can be verified with the IGT test I sent together with the series. I don´t know the exact mechanism by which the pre-emption messes with the execution order but, since other people is working on the Scheduler + Preemption on Execlists, I didn´t try to fix it. In these series, only Lite Restores are supported (other kind of preemptions WARN). v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several rebase changes. v3: Clarify how the race is avoided, as requested by Daniel. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Align function parameters ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_lrc.h | 2 ++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 22f6a7c0cb18..0f1b6b2b0f0e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -268,6 +268,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } else if (req0->ctx == cursor->ctx) { /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ + cursor->elsp_submitted = req0->elsp_submitted; list_del(&req0->execlist_link); queue_work(dev_priv->wq, &req0->work); req0 = cursor; @@ -277,9 +278,15 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } } + WARN_ON(req1 && req1->elsp_submitted); + WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail, req1 ? req1->ctx : NULL, req1 ? req1->tail : 0)); + + req0->elsp_submitted++; + if (req1) + req1->elsp_submitted++; } static bool execlists_check_remove_request(struct intel_engine_cs *ring, @@ -298,9 +305,14 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, struct drm_i915_gem_object *ctx_obj = head_req->ctx->engine[ring->id].state; if (intel_execlists_ctx_id(ctx_obj) == request_id) { - list_del(&head_req->execlist_link); - queue_work(dev_priv->wq, &head_req->work); - return true; + WARN(head_req->elsp_submitted == 0, + "Never submitted head request\n"); + + if (--head_req->elsp_submitted <= 0) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } } } @@ -333,7 +345,16 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + (read_pointer % 6) * 8 + 4); - if (status & GEN8_CTX_STATUS_COMPLETE) { + if (status & GEN8_CTX_STATUS_PREEMPTED) { + if (status & GEN8_CTX_STATUS_LITE_RESTORE) { + if (execlists_check_remove_request(ring, status_id)) + WARN(1, "Lite Restored request removed from queue\n"); + } else + WARN(1, "Preemption without Lite Restore\n"); + } + + if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) || + (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) { if (execlists_check_remove_request(ring, status_id)) submit_contexts++; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index a3f135cf439e..331c6c2ba376 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -66,6 +66,8 @@ struct intel_ctx_submit_request { struct list_head execlist_link; struct work_struct work; + + int elsp_submitted; }; void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); -- GitLab From f1ad5a1fd4127b3a5e21b8f5ef7f1921a5d3063e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:41 +0100 Subject: [PATCH 0169/1868] drm/i915/bdw: Help out the ctx switch interrupt handler If we receive a storm of requests for the same context (see gem_storedw_loop_*) we might end up iterating over too many elements in interrupt time, looking for contexts to squash together. Instead, share the burden by giving more intelligence to the queue function. At most, the interrupt will iterate over three elements. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0f1b6b2b0f0e..6f6c5a931faf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -392,10 +392,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { - struct intel_ctx_submit_request *req = NULL; + struct intel_ctx_submit_request *req = NULL, *cursor; struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; - bool was_empty; + int num_elements = 0; req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) @@ -410,9 +410,27 @@ static int execlists_context_queue(struct intel_engine_cs *ring, spin_lock_irqsave(&ring->execlist_lock, flags); - was_empty = list_empty(&ring->execlist_queue); + list_for_each_entry(cursor, &ring->execlist_queue, execlist_link) + if (++num_elements > 2) + break; + + if (num_elements > 2) { + struct intel_ctx_submit_request *tail_req; + + tail_req = list_last_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (to == tail_req->ctx) { + WARN(tail_req->elsp_submitted != 0, + "More than 2 already-submitted reqs queued\n"); + list_del(&tail_req->execlist_link); + queue_work(dev_priv->wq, &tail_req->work); + } + } + list_add_tail(&req->execlist_link, &ring->execlist_queue); - if (was_empty) + if (num_elements == 0) execlists_context_unqueue(ring); spin_unlock_irqrestore(&ring->execlist_lock, flags); -- GitLab From 4ed91096881449012b14b1e879f40b4a37533e0e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 8 Aug 2014 20:27:01 +0200 Subject: [PATCH 0170/1868] drm/i915: Track cursor changes as frontbuffer tracking flushes We treat other plane updates in the same fashion. Spotted because Rodrigo kept reporting a bug in the PSR code where the frontbuffer was eternally stuck with a dirty cursor bit set. The psr testcase should have caught this, but that i-g-t is kaputt. Rodrigo is signed up to fix that. Cc: Rodrigo Vivi Tested-by-and-Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fd15601f6360..b2e4eac7b70b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11782,6 +11782,10 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, return intel_crtc_cursor_set_obj(crtc, obj, crtc_w, crtc_h); } else { intel_crtc_update_cursor(crtc, visible); + + intel_frontbuffer_flip(crtc->dev, + INTEL_FRONTBUFFER_CURSOR(intel_crtc->pipe)); + return 0; } } -- GitLab From ba07975f0fe5bf95107d71d0df0405c16f5c3266 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:30 -0700 Subject: [PATCH 0171/1868] gpu: ipu-v3: Add functions to set CSI/IC source muxes Adds two new functions, ipu_set_csi_src_mux() and ipu_set_ic_src_mux(), that select the inputs to the CSI and IC respectively. Both muxes are programmed in the IPU_CONF register. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 51 +++++++++++++++++++++++++++++++++ include/video/imx-ipu-v3.h | 6 ++++ 2 files changed, 57 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 5978e7aab8ed..cae543115856 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -382,6 +382,57 @@ static int ipu_memory_reset(struct ipu_soc *ipu) return 0; } +/* + * Set the source mux for the given CSI. Selects either parallel or + * MIPI CSI2 sources. + */ +void ipu_set_csi_src_mux(struct ipu_soc *ipu, int csi_id, bool mipi_csi2) +{ + unsigned long flags; + u32 val, mask; + + mask = (csi_id == 1) ? IPU_CONF_CSI1_DATA_SOURCE : + IPU_CONF_CSI0_DATA_SOURCE; + + spin_lock_irqsave(&ipu->lock, flags); + + val = ipu_cm_read(ipu, IPU_CONF); + if (mipi_csi2) + val |= mask; + else + val &= ~mask; + ipu_cm_write(ipu, val, IPU_CONF); + + spin_unlock_irqrestore(&ipu->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_set_csi_src_mux); + +/* + * Set the source mux for the IC. Selects either CSI[01] or the VDI. + */ +void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&ipu->lock, flags); + + val = ipu_cm_read(ipu, IPU_CONF); + if (vdi) { + val |= IPU_CONF_IC_INPUT; + } else { + val &= ~IPU_CONF_IC_INPUT; + if (csi_id == 1) + val |= IPU_CONF_CSI_SEL; + else + val &= ~IPU_CONF_CSI_SEL; + } + ipu_cm_write(ipu, val, IPU_CONF); + + spin_unlock_irqrestore(&ipu->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_set_ic_src_mux); + struct ipu_devtype { const char *name; unsigned long cm_ofs; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index ef64b66b18df..f80fe13b0d4d 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -92,6 +92,12 @@ int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel, #define IPU_IRQ_VSYNC_PRE_0 (448 + 14) #define IPU_IRQ_VSYNC_PRE_1 (448 + 15) +/* + * IPU Common functions + */ +void ipu_set_csi_src_mux(struct ipu_soc *ipu, int csi_id, bool mipi_csi2); +void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi); + /* * IPU Image DMA Controller (idmac) functions */ -- GitLab From c2d670fd3b16304124162bef99313eaa289f2bc3 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:31 -0700 Subject: [PATCH 0172/1868] gpu: ipu-v3: Rename and add IDMAC channels Rename the ENC/VF/PP rotation channel names, to be more consistent with the convention that *_MEM is write-to-memory channels and MEM_* is read-from-memory channels. Also add the channels who's source and destination is the IC. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prv.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 0a7b2adaba39..1a5c55c05fe8 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -28,17 +28,25 @@ struct ipu_soc; #define IPUV3_CHANNEL_CSI1 1 #define IPUV3_CHANNEL_CSI2 2 #define IPUV3_CHANNEL_CSI3 3 +#define IPUV3_CHANNEL_VDI_MEM_IC_VF 5 +#define IPUV3_CHANNEL_MEM_IC_PP 11 +#define IPUV3_CHANNEL_MEM_IC_PRP_VF 12 +#define IPUV3_CHANNEL_G_MEM_IC_PRP_VF 14 +#define IPUV3_CHANNEL_G_MEM_IC_PP 15 +#define IPUV3_CHANNEL_IC_PRP_ENC_MEM 20 +#define IPUV3_CHANNEL_IC_PRP_VF_MEM 21 +#define IPUV3_CHANNEL_IC_PP_MEM 22 #define IPUV3_CHANNEL_MEM_BG_SYNC 23 #define IPUV3_CHANNEL_MEM_FG_SYNC 27 #define IPUV3_CHANNEL_MEM_DC_SYNC 28 #define IPUV3_CHANNEL_MEM_FG_SYNC_ALPHA 31 #define IPUV3_CHANNEL_MEM_DC_ASYNC 41 -#define IPUV3_CHANNEL_ROT_ENC_MEM 45 -#define IPUV3_CHANNEL_ROT_VF_MEM 46 -#define IPUV3_CHANNEL_ROT_PP_MEM 47 -#define IPUV3_CHANNEL_ROT_ENC_MEM_OUT 48 -#define IPUV3_CHANNEL_ROT_VF_MEM_OUT 49 -#define IPUV3_CHANNEL_ROT_PP_MEM_OUT 50 +#define IPUV3_CHANNEL_MEM_ROT_ENC 45 +#define IPUV3_CHANNEL_MEM_ROT_VF 46 +#define IPUV3_CHANNEL_MEM_ROT_PP 47 +#define IPUV3_CHANNEL_ROT_ENC_MEM 48 +#define IPUV3_CHANNEL_ROT_VF_MEM 49 +#define IPUV3_CHANNEL_ROT_PP_MEM 50 #define IPUV3_CHANNEL_MEM_BG_SYNC_ALPHA 51 #define IPU_MCU_T_DEFAULT 8 -- GitLab From b22ae40ef2e7847ddbd802d1a887188e113675f3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 8 Aug 2014 17:23:07 +0200 Subject: [PATCH 0173/1868] Documentation: kbuild: Remove obsolete include/asm symlink step As of commit f7f16b7799ed68654850ab340ef812895aebcf4c ("kbuild: drop include/asm"), the include/asm symlink is no longer created. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michal Marek --- Documentation/kbuild/makefiles.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 764f5991a3fc..a445e1c8828e 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -818,17 +818,16 @@ a few targets. When kbuild executes, the following steps are followed (roughly): 1) Configuration of the kernel => produce .config 2) Store kernel version in include/linux/version.h -3) Symlink include/asm to include/asm-$(ARCH) -4) Updating all other prerequisites to the target prepare: +3) Updating all other prerequisites to the target prepare: - Additional prerequisites are specified in arch/$(ARCH)/Makefile -5) Recursively descend down in all directories listed in +4) Recursively descend down in all directories listed in init-* core* drivers-* net-* libs-* and build all targets. - The values of the above variables are expanded in arch/$(ARCH)/Makefile. -6) All object files are then linked and the resulting file vmlinux is +5) All object files are then linked and the resulting file vmlinux is located at the root of the obj tree. The very first objects linked are listed in head-y, assigned by arch/$(ARCH)/Makefile. -7) Finally, the architecture-specific part does any required post processing +6) Finally, the architecture-specific part does any required post processing and builds the final bootimage. - This includes building boot records - Preparing initrd images and the like -- GitLab From ef80f0a1e033bcab17257e2155a3c9263a0919c1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 8 Aug 2014 17:23:08 +0200 Subject: [PATCH 0174/1868] Documentation: kbuild: Improve if_changed documentation - These days if_changed is used with many more commands than ld, objcopy, and gzip, hence add an ellipsis, - Any target that utilises if_changed must be listed in $(targets), so it needs an assignment to "targets", not "target". Signed-off-by: Geert Uytterhoeven Signed-off-by: Michal Marek --- Documentation/kbuild/makefiles.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index a445e1c8828e..520b2c75bc56 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -1092,7 +1092,7 @@ When kbuild executes, the following steps are followed (roughly): Usage: target: source(s) FORCE - $(call if_changed,ld/objcopy/gzip) + $(call if_changed,ld/objcopy/gzip/...) When the rule is evaluated, it is checked to see if any files need an update, or the command line has changed since the last @@ -1110,7 +1110,7 @@ When kbuild executes, the following steps are followed (roughly): significant; for instance, the below will fail (note the extra space after the comma): target: source(s) FORCE - #WRONG!# $(call if_changed, ld/objcopy/gzip) + #WRONG!# $(call if_changed, ld/objcopy/gzip/...) ld Link target. Often, LDFLAGS_$@ is used to set specific options to ld. @@ -1142,7 +1142,7 @@ When kbuild executes, the following steps are followed (roughly): The ": %: %.o" part of the prerequisite is a shorthand that free us from listing the setup.o and bootsect.o files. - Note: It is a common mistake to forget the "target :=" assignment, + Note: It is a common mistake to forget the "targets :=" assignment, resulting in the target file being recompiled for no obvious reason. -- GitLab From cf6c53db9525a7f6c5052ccd84a0638128f14632 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 8 Aug 2014 17:23:09 +0200 Subject: [PATCH 0175/1868] Documentation: kbuild: Remove obsolete dtc_cpp section Commit b40b25fff8205dd18124d8fc87b2c9c57f269b5f ("kbuild: always run gcc -E on *.dts, remove cmd_dtc_cpp") improved the functionality of cmd_dtc_cpp and merged it back into cmd_dtc. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michal Marek --- Documentation/kbuild/makefiles.txt | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 520b2c75bc56..eda00a1073a7 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -1163,29 +1163,6 @@ When kbuild executes, the following steps are followed (roughly): clean-files += *.dtb DTC_FLAGS ?= -p 1024 - dtc_cpp - This is just like dtc as describe above, except that the C pre- - processor is invoked upon the .dtsp file before compiling the result - with dtc. - - In order for build dependencies to work, all files compiled using - dtc_cpp must use the C pre-processor's #include functionality and not - dtc's /include/ functionality. - - Using the C pre-processor allows use of #define to create named - constants. In turn, the #defines will typically appear in a header - file, which may be shared with regular C code. Since the dtc language - represents a data structure rather than code in C syntax, similar - restrictions are placed on a header file included by a device tree - file as for a header file included by an assembly language file. - In particular, the C pre-processor is passed -x assembler-with-cpp, - which sets macro __ASSEMBLY__. __DTS__ is also set. These allow header - files to restrict their content to that compatible with device tree - source. - - A central rule exists to create $(obj)/%.dtb from $(src)/%.dtsp; - architecture Makefiles do no need to explicitly write out that rule. - --- 6.8 Custom kbuild commands When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand -- GitLab From 39fed7015cd9124b5893fce18d33f49db1c48bea Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 8 Aug 2014 17:23:10 +0200 Subject: [PATCH 0176/1868] Documentation: kbuild: Improve grammar - singular versus plural, - "by" versus "of", - missing "if", "it", "the", - consistent use of "xxx-specific" versus "xxx specific". Signed-off-by: Geert Uytterhoeven Acked-by: Randy Dunlap Signed-off-by: Michal Marek --- Documentation/kbuild/makefiles.txt | 56 +++++++++++++++--------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index eda00a1073a7..a311db829e9b 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -174,7 +174,7 @@ more details, with real examples. --- 3.3 Loadable module goals - obj-m - $(obj-m) specify object files which are built as loadable + $(obj-m) specifies object files which are built as loadable kernel modules. A module may be built from one source file or several source @@ -277,7 +277,7 @@ more details, with real examples. down in the ext2 directory. Kbuild only uses this information to decide that it needs to visit the directory, it is the Makefile in the subdirectory that - specifies what is modules and what is built-in. + specifies what is modular and what is built-in. It is good practice to use a CONFIG_ variable when assigning directory names. This allows kbuild to totally skip the directory if the @@ -403,7 +403,7 @@ more details, with real examples. echoing information to user in a rule is often a good practice but when execution "make -s" one does not expect to see any output except for warnings/errors. - To support this kbuild define $(kecho) which will echo out the + To support this kbuild defines $(kecho) which will echo out the text following $(kecho) to stdout except if "make -s" is used. Example: @@ -417,7 +417,7 @@ more details, with real examples. The kernel may be built with several different versions of $(CC), each supporting a unique set of features and options. - kbuild provide basic support to check for valid options for $(CC). + kbuild provides basic support to check for valid options for $(CC). $(CC) is usually the gcc compiler, but other alternatives are available. @@ -456,8 +456,8 @@ more details, with real examples. Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options cc-option - cc-option is used to check if $(CC) supports a given option, and not - supported to use an optional second option. + cc-option is used to check if $(CC) supports a given option, and if + not supported to use an optional second option. Example: #arch/x86/Makefile @@ -557,8 +557,8 @@ more details, with real examples. false ; \ fi - In this example for a specific GCC version the build will error out explaining - to the user why it stops. + In this example for a specific GCC version the build will error out + explaining to the user why it stops. cc-cross-prefix cc-cross-prefix is used to check if there exists a $(CC) in path with @@ -656,7 +656,7 @@ Both possibilities are described in the following. In the example above the executable is composed of the C++ file qconf.cc - identified by $(qconf-cxxobjs). - If qconf is composed by a mixture of .c and .cc files, then an + If qconf is composed of a mixture of .c and .cc files, then an additional line can be used to identify this. Example: @@ -733,7 +733,7 @@ Both possibilities are described in the following. hostprogs-$(CONFIG_KALLSYMS) += kallsyms Kbuild knows about both 'y' for built-in and 'm' for module. - So if a config symbol evaluate to 'm', kbuild will still build + So if a config symbol evaluates to 'm', kbuild will still build the binary. In other words, Kbuild handles hostprogs-m exactly like hostprogs-y. But only hostprogs-y is recommended to be used when no CONFIG symbols are involved. @@ -754,8 +754,8 @@ Additional files can be specified in kbuild makefiles by use of $(clean-files). #drivers/pci/Makefile clean-files := devlist.h classlist.h -When executing "make clean", the two files "devlist.h classlist.h" will -be deleted. Kbuild will assume files to be in same relative directory as the +When executing "make clean", the two files "devlist.h classlist.h" will be +deleted. Kbuild will assume files to be in the same relative directory as the Makefile except if an absolute path is specified (path starting with '/'). To delete a directory hierarchy use: @@ -786,7 +786,7 @@ is not sufficient this sometimes needs to be explicit. The above assignment instructs kbuild to descend down in the directory compressed/ when "make clean" is executed. -To support the clean infrastructure in the Makefiles that builds the +To support the clean infrastructure in the Makefiles that build the final bootimage there is an optional target named archclean: Example: @@ -926,7 +926,7 @@ When kbuild executes, the following steps are followed (roughly): KBUILD_AFLAGS_MODULE Options for $(AS) when building modules - $(KBUILD_AFLAGS_MODULE) is used to add arch specific options that + $(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that are used for $(AS). From commandline AFLAGS_MODULE shall be used (see kbuild.txt). @@ -937,13 +937,13 @@ When kbuild executes, the following steps are followed (roughly): KBUILD_CFLAGS_MODULE Options for $(CC) when building modules - $(KBUILD_CFLAGS_MODULE) is used to add arch specific options that + $(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that are used for $(CC). From commandline CFLAGS_MODULE shall be used (see kbuild.txt). KBUILD_LDFLAGS_MODULE Options for $(LD) when linking modules - $(KBUILD_LDFLAGS_MODULE) is used to add arch specific options + $(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options used when linking modules. This is often a linker script. From commandline LDFLAGS_MODULE shall be used (see kbuild.txt). @@ -1065,7 +1065,7 @@ When kbuild executes, the following steps are followed (roughly): extra-y - extra-y specify additional targets created in the current + extra-y specifies additional targets created in the current directory, in addition to any targets specified by obj-*. Listing all targets in extra-y is required for two purposes: @@ -1141,7 +1141,7 @@ When kbuild executes, the following steps are followed (roughly): 2) delete target during make clean The ": %: %.o" part of the prerequisite is a shorthand that - free us from listing the setup.o and bootsect.o files. + frees us from listing the setup.o and bootsect.o files. Note: It is a common mistake to forget the "targets :=" assignment, resulting in the target file being recompiled for no obvious reason. @@ -1213,11 +1213,11 @@ When kbuild executes, the following steps are followed (roughly): When building the *.lds target, kbuild uses the variables: KBUILD_CPPFLAGS : Set in top-level Makefile cppflags-y : May be set in the kbuild makefile - CPPFLAGS_$(@F) : Target specific flags. + CPPFLAGS_$(@F) : Target-specific flags. Note that the full filename is used in this assignment. - The kbuild infrastructure for *lds file are used in several + The kbuild infrastructure for *lds files is used in several architecture-specific files. --- 6.10 Generic header files @@ -1230,11 +1230,11 @@ When kbuild executes, the following steps are followed (roughly): === 7 Kbuild syntax for exported headers -The kernel include a set of headers that is exported to userspace. +The kernel includes a set of headers that is exported to userspace. Many headers can be exported as-is but other headers require a minimal pre-processing before they are ready for user-space. The pre-processing does: -- drop kernel specific annotations +- drop kernel-specific annotations - drop include of compiler.h - drop all sections that are kernel internal (guarded by ifdef __KERNEL__) @@ -1244,7 +1244,7 @@ See subsequent chapter for the syntax of the Kbuild file. --- 7.1 header-y - header-y specify header files to be exported. + header-y specifies header files to be exported. Example: #include/linux/Kbuild @@ -1254,7 +1254,7 @@ See subsequent chapter for the syntax of the Kbuild file. The convention is to list one file per line and preferably in alphabetic order. - header-y also specify which subdirectories to visit. + header-y also specifies which subdirectories to visit. A subdirectory is identified by a trailing '/' which can be seen in the example above for the usb subdirectory. @@ -1272,9 +1272,9 @@ See subsequent chapter for the syntax of the Kbuild file. --- 7.3 destination-y - When an architecture have a set of exported headers that needs to be + When an architecture has a set of exported headers that needs to be exported to a different directory destination-y is used. - destination-y specify the destination directory for all exported + destination-y specifies the destination directory for all exported headers in the file where it is present. Example: @@ -1367,9 +1367,9 @@ The top Makefile exports the following variables: INSTALL_MOD_STRIP - If this variable is specified, will cause modules to be stripped + If this variable is specified, it will cause modules to be stripped after they are installed. If INSTALL_MOD_STRIP is '1', then the - default option --strip-debug will be used. Otherwise, + default option --strip-debug will be used. Otherwise, the INSTALL_MOD_STRIP value will be used as the option(s) to the strip command. -- GitLab From c8589d1e9e01debdb4f574afe7c585714353ad79 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Aug 2014 16:34:20 +0900 Subject: [PATCH 0177/1868] kbuild: handle multi-objs dependency appropriately The comment in scripts/Makefile.build says as follows: We would rather have a list of rules like foo.o: $(foo-objs) but that's not so easy, so we rather make all composite objects depend on the set of all their parts This commit makes it possible! For example, assume a Makefile like this obj-m = foo.o bar.o foo-objs := foo1.o foo2.o bar-objs := bar1.o bar2.o Without this patch, foo.o depends on all of foo1.o foo2.o bar1.o bar2.o. It looks funny that foo.o is regenerated when bar1.c is updated. Now we can handle the dependency of foo.o and bar.o separately. Signed-off-by: Masahiro Yamada Signed-off-by: Michal Marek --- scripts/Makefile.build | 10 ++++------ scripts/Makefile.lib | 9 +++++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index bf3e6778cd71..5b09d3637855 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -382,16 +382,14 @@ cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) $(cmd_secanalys quiet_cmd_link_multi-m = LD [M] $@ cmd_link_multi-m = $(cmd_link_multi-y) -# We would rather have a list of rules like -# foo.o: $(foo-objs) -# but that's not so easy, so we rather make all composite objects depend -# on the set of all their parts -$(multi-used-y) : %.o: $(multi-objs-y) FORCE +$(multi-used-y): FORCE $(call if_changed,link_multi-y) +$(call multi_depend, $(multi-used-y), .o, -objs -y) -$(multi-used-m) : %.o: $(multi-objs-m) FORCE +$(multi-used-m): FORCE $(call if_changed,link_multi-m) @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod) +$(call multi_depend, $(multi-used-m), .o, -objs -y) targets += $(multi-used-y) $(multi-used-m) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 260bf8acfce9..54be19a0fa51 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -159,6 +159,15 @@ dtc_cpp_flags = -Wp,-MD,$(depfile).pre.tmp -nostdinc \ modname-multi = $(sort $(foreach m,$(multi-used),\ $(if $(filter $(subst $(obj)/,,$*.o), $($(m:.o=-objs)) $($(m:.o=-y))),$(m:.o=)))) +# Useful for describing the dependency of composite objects +# Usage: +# $(call multi_depend, multi_used_targets, suffix_to_remove, suffix_to_add) +define multi_depend +$(foreach m, $(notdir $1), \ + $(eval $(obj)/$m: \ + $(addprefix $(obj)/, $(foreach s, $3, $($(m:%$(strip $2)=%$(s))))))) +endef + ifdef REGENERATE_PARSERS # GPERF -- GitLab From 97e3226e6e984c8cd9bed47010f30827a3ce816a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Aug 2014 16:34:21 +0900 Subject: [PATCH 0178/1868] kbuild: handle the dependency of multi-objs hostprogs appropriately Assume we have a Makefile like: hostprogs-y := foo bar foo-objs := foo1.o foo2.o bar-objs := bar1.o bar2.o Without this commit, the host program foo depends on all of foo1.o foo2.o bar1.o bar2.o. This commit allows to handle the dependency of each host program separately. Signed-off-by: Masahiro Yamada Signed-off-by: Michal Marek --- scripts/Makefile.host | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.host b/scripts/Makefile.host index ab5980f91714..133edfae5b8a 100644 --- a/scripts/Makefile.host +++ b/scripts/Makefile.host @@ -96,8 +96,9 @@ quiet_cmd_host-cmulti = HOSTLD $@ cmd_host-cmulti = $(HOSTCC) $(HOSTLDFLAGS) -o $@ \ $(addprefix $(obj)/,$($(@F)-objs)) \ $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F)) -$(host-cmulti): $(obj)/%: $(host-cobjs) FORCE +$(host-cmulti): FORCE $(call if_changed,host-cmulti) +$(call multi_depend, $(host-cmulti), , -objs) # Create .o file from a single .c file # host-cobjs -> .o @@ -113,8 +114,9 @@ quiet_cmd_host-cxxmulti = HOSTLD $@ $(foreach o,objs cxxobjs,\ $(addprefix $(obj)/,$($(@F)-$(o)))) \ $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F)) -$(host-cxxmulti): $(obj)/%: $(host-cobjs) $(host-cxxobjs) FORCE +$(host-cxxmulti): FORCE $(call if_changed,host-cxxmulti) +$(call multi_depend, $(host-cxxmulti), , -objs -cxxobjs) # Create .o file from a single .cc (C++) file quiet_cmd_host-cxxobjs = HOSTCXX $@ -- GitLab From 022af62d0190e1e3db63c19aeb5f51ae0612cd71 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Aug 2014 16:34:22 +0900 Subject: [PATCH 0179/1868] kbuild: refactor script/kconfig/Makefile Now it is harmless to add all host programs to hostprogs-y. Signed-off-by: Masahiro Yamada Signed-off-by: Michal Marek --- scripts/kconfig/Makefile | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 9c4d2412fb72..76f6171768e4 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -157,35 +157,7 @@ qconf-cxxobjs := qconf.o qconf-objs := zconf.tab.o gconf-objs := gconf.o zconf.tab.o -hostprogs-y := conf - -ifeq ($(MAKECMDGOALS),nconfig) - hostprogs-y += nconf -endif - -ifeq ($(MAKECMDGOALS),menuconfig) - hostprogs-y += mconf -endif - -ifeq ($(MAKECMDGOALS),update-po-config) - hostprogs-y += kxgettext -endif - -ifeq ($(MAKECMDGOALS),xconfig) - qconf-target := 1 -endif -ifeq ($(MAKECMDGOALS),gconfig) - gconf-target := 1 -endif - - -ifeq ($(qconf-target),1) - hostprogs-y += qconf -endif - -ifeq ($(gconf-target),1) - hostprogs-y += gconf -endif +hostprogs-y := conf nconf mconf kxgettext qconf gconf clean-files := qconf.moc .tmp_qtcheck .tmp_gtkcheck clean-files += zconf.tab.c zconf.lex.c zconf.hash.c gconf.glade.h @@ -224,7 +196,7 @@ HOSTLOADLIBES_nconf = $(shell \ || echo "-lmenu -lpanel -lncurses" ) $(obj)/qconf.o: $(obj)/.tmp_qtcheck -ifeq ($(qconf-target),1) +ifeq ($(MAKECMDGOALS),xconfig) $(obj)/.tmp_qtcheck: $(src)/Makefile -include $(obj)/.tmp_qtcheck @@ -281,7 +253,7 @@ endif $(obj)/gconf.o: $(obj)/.tmp_gtkcheck -ifeq ($(gconf-target),1) +ifeq ($(MAKECMDGOALS),gconfig) -include $(obj)/.tmp_gtkcheck # GTK needs some extra effort, too... -- GitLab From 221ecca6cafefbb5106cfc8bf9f1105233a33745 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Aug 2014 16:34:23 +0900 Subject: [PATCH 0180/1868] kbuild: remove redundant clean-files from scripts/kconfig/Makefile Now mconf, qconf, gconf, nconf are always added to hostprogs-y. Files added to hostprogs-y are removed by "make clean". Signed-off-by: Masahiro Yamada Signed-off-by: Michal Marek --- scripts/kconfig/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 76f6171768e4..e7bf38e92007 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -161,7 +161,6 @@ hostprogs-y := conf nconf mconf kxgettext qconf gconf clean-files := qconf.moc .tmp_qtcheck .tmp_gtkcheck clean-files += zconf.tab.c zconf.lex.c zconf.hash.c gconf.glade.h -clean-files += mconf qconf gconf nconf clean-files += config.pot linux.pot # Check that we have the required ncurses stuff installed for lxdialog (menuconfig) -- GitLab From 8bf4abaddd01aa6c9d13804fa05084cb28135a47 Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Sat, 16 Aug 2014 07:56:56 +0200 Subject: [PATCH 0181/1868] scripts/tags.sh: Don't specify kind-spec for emacs' ctags/etags Emacs' ctags/etags don't know about kind-spec in --regex and produce warnings: etags: invalid regexp modifier `v', ignoring etags: invalid regexp modifier `/', ignoring Fix it by removing kind-spec for the emacs case. Signed-off-by: Dirk Gouders Inspired-by: Masatake YAMATO Tested-by: Masatake YAMATO Signed-off-by: Michal Marek --- scripts/tags.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index cbfd269a6011..727989757d59 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -263,7 +263,7 @@ emacs() --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\ - --regex='/DEFINE_HASHTABLE\((\w*)/\1/v/' + --regex='/DEFINE_HASHTABLE\((\w*)/\1/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' -- GitLab From a60113d6a7fca3320e84d25db84c3c1a5a02b505 Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Sat, 16 Aug 2014 07:56:57 +0200 Subject: [PATCH 0182/1868] scripts/tags.sh: remove *PCGFLAGS regular expressions Commit 0a31bc97c80c3fa8 (mm: memcontrol: rewrite uncharge API) removed the macros {TEST,SET,CLEAR,TESTCLEAR}PCFLAG. Remove corresponding entries from tags.sh -- in the emacs case they also produced warnigs because of unmatched '\('. Signed-off-by: Dirk Gouders Inspired-by: Masatake YAMATO Signed-off-by: Michal Marek --- scripts/tags.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 727989757d59..0f61bd7ee958 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -193,10 +193,6 @@ exuberant() --regex-c++='/TESTCLEARFLAG_FALSE\(([^,)]*).*/TestClearPage\1/' \ --regex-c++='/__TESTCLEARFLAG_FALSE\(([^,)]*).*/__TestClearPage\1/' \ --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/' \ - --regex-c++='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/' \ - --regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ - --regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ - --regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ --regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \ --regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \ --regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \ @@ -256,10 +252,6 @@ emacs() --regex='/__CLEARPAGEFLAG_NOOP(\([^,)]*\).*/__ClearPage\1/' \ --regex='/TESTCLEARFLAG_FALSE(\([^,)]*\).*/TestClearPage\1/' \ --regex='/__TESTCLEARFLAG_FALSE(\([^,)]*\).*/__TestClearPage\1/' \ - --regex='/TESTPCGFLAG\(([^,)]*).*/PageCgroup\1/' \ - --regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \ - --regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \ - --regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \ --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\ -- GitLab From 8e170655b517ba49bf4d015008474bcc2f425b20 Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Sat, 16 Aug 2014 07:56:58 +0200 Subject: [PATCH 0183/1868] scripts/tags.sh: fix DEFINE_HASHTABLE in emacs case The emacs --regex for DEFINE_HASHTABLE produced a warning because of an unmatched '\('. Further, the whole entry did not work, because the regex needs to match from the beginning of a line, including keywords like 'static'. Finally, '\w' should not be used, because it stops at underscores which are often part of variable names in C, resulting in wrong entries in the tags file. Signed-off-by: Dirk Gouders Inspired-by: Masatake YAMATO Signed-off-by: Michal Marek --- scripts/tags.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 0f61bd7ee958..fd651f90a838 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -255,7 +255,7 @@ emacs() --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \ --regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \ --regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\ - --regex='/DEFINE_HASHTABLE\((\w*)/\1/' + --regex='/[^#]*DEFINE_HASHTABLE(\([^,)]*\)/\1/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' -- GitLab From f171abab8f1a75797124be5aae8376e20e4852d9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 4 Aug 2014 10:06:28 +0530 Subject: [PATCH 0184/1868] iommu/exynos: Fix trivial typos Fixed trivial typos and grammar to improve readability. Changed w/a to workaround. Signed-off-by: Sachin Kamat Acked-by: Randy Dunlap Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index d037e87a1fe5..74233186f6f7 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -32,7 +32,7 @@ typedef u32 sysmmu_iova_t; typedef u32 sysmmu_pte_t; -/* We does not consider super section mapping (16MB) */ +/* We do not consider super section mapping (16MB) */ #define SECT_ORDER 20 #define LPAGE_ORDER 16 #define SPAGE_ORDER 12 @@ -307,7 +307,7 @@ static void show_fault_information(const char *name, static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) { - /* SYSMMU is in blocked when interrupt occurred. */ + /* SYSMMU is in blocked state when interrupt occurred. */ struct sysmmu_drvdata *data = dev_id; enum exynos_sysmmu_inttype itype; sysmmu_iova_t addr = -1; @@ -567,8 +567,8 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova, /* * L2TLB invalidation required * 4KB page: 1 invalidation - * 64KB page: 16 invalidation - * 1MB page: 64 invalidation + * 64KB page: 16 invalidations + * 1MB page: 64 invalidations * because it is set-associative TLB * with 8-way and 64 sets. * 1MB page can be cached in one of all sets. @@ -714,7 +714,7 @@ static int exynos_iommu_domain_init(struct iommu_domain *domain) if (!priv->lv2entcnt) goto err_counter; - /* w/a of System MMU v3.3 to prevent caching 1MiB mapping */ + /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */ for (i = 0; i < NUM_LV1ENTRIES; i += 8) { priv->pgtable[i + 0] = ZERO_LV2LINK; priv->pgtable[i + 1] = ZERO_LV2LINK; @@ -861,14 +861,14 @@ static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv, pgtable_flush(sent, sent + 1); /* - * If pretched SLPD is a fault SLPD in zero_l2_table, FLPD cache - * may caches the address of zero_l2_table. This function - * replaces the zero_l2_table with new L2 page table to write - * valid mappings. + * If pre-fetched SLPD is a faulty SLPD in zero_l2_table, + * FLPD cache may cache the address of zero_l2_table. This + * function replaces the zero_l2_table with new L2 page table + * to write valid mappings. * Accessing the valid area may cause page fault since FLPD - * cache may still caches zero_l2_table for the valid area - * instead of new L2 page table that have the mapping - * information of the valid area + * cache may still cache zero_l2_table for the valid area + * instead of new L2 page table that has the mapping + * information of the valid area. * Thus any replacement of zero_l2_table with other valid L2 * page table must involve FLPD cache invalidation for System * MMU v3.3. @@ -963,27 +963,27 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size, /* * *CAUTION* to the I/O virtual memory managers that support exynos-iommu: * - * System MMU v3.x have an advanced logic to improve address translation + * System MMU v3.x has advanced logic to improve address translation * performance with caching more page table entries by a page table walk. - * However, the logic has a bug that caching fault page table entries and System - * MMU reports page fault if the cached fault entry is hit even though the fault - * entry is updated to a valid entry after the entry is cached. - * To prevent caching fault page table entries which may be updated to valid - * entries later, the virtual memory manager should care about the w/a about the - * problem. The followings describe w/a. + * However, the logic has a bug that while caching faulty page table entries, + * System MMU reports page fault if the cached fault entry is hit even though + * the fault entry is updated to a valid entry after the entry is cached. + * To prevent caching faulty page table entries which may be updated to valid + * entries later, the virtual memory manager should care about the workaround + * for the problem. The following describes the workaround. * * Any two consecutive I/O virtual address regions must have a hole of 128KiB - * in maximum to prevent misbehavior of System MMU 3.x. (w/a of h/w bug) + * at maximum to prevent misbehavior of System MMU 3.x (workaround for h/w bug). * - * Precisely, any start address of I/O virtual region must be aligned by + * Precisely, any start address of I/O virtual region must be aligned with * the following sizes for System MMU v3.1 and v3.2. * System MMU v3.1: 128KiB * System MMU v3.2: 256KiB * * Because System MMU v3.3 caches page table entries more aggressively, it needs - * more w/a. - * - Any two consecutive I/O virtual regions must be have a hole of larger size - * than or equal size to 128KiB. + * more workarounds. + * - Any two consecutive I/O virtual regions must have a hole of size larger + * than or equal to 128KiB. * - Start address of an I/O virtual region must be aligned by 128KiB. */ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long l_iova, @@ -1061,7 +1061,8 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, goto err; } - *ent = ZERO_LV2LINK; /* w/a for h/w bug in Sysmem MMU v3.3 */ + /* workaround for h/w bug in System MMU v3.3 */ + *ent = ZERO_LV2LINK; pgtable_flush(ent, ent + 1); size = SECT_SIZE; goto done; -- GitLab From f63ef69028742b09c1c0896177d555a30ff6cf13 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 11 Aug 2014 13:13:25 +0200 Subject: [PATCH 0185/1868] iommu/vt-d: Don't store SIRTP request Don't store the SIRTP request bit in the register state. It will otherwise become sticky and could request an Interrupt Remap Table Pointer update on each command register write. Found while starting to emulate IR in QEMU, not by observing problems on real hardware. Signed-off-by: Jan Kiszka Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 0df41f6264f5..a872874c2565 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -438,8 +438,7 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); /* Set interrupt-remapping table pointer */ - iommu->gcmd |= DMA_GCMD_SIRTP; - writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); -- GitLab From eecbad7d0306b9ee4f621517052913d1adaea753 Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Mon, 18 Aug 2014 15:20:56 +0300 Subject: [PATCH 0186/1868] iommu: Replace rcu_assign_pointer() with RCU_INIT_POINTER() The use of "rcu_assign_pointer()" is NULLing out the pointer. According to RCU_INIT_POINTER()'s block comment: "1. This use of RCU_INIT_POINTER() is NULLing out the pointer" it is better to use it instead of rcu_assign_pointer() because it has a smaller overhead. The following Coccinelle semantic patch was used: @@ @@ - rcu_assign_pointer + RCU_INIT_POINTER (..., NULL) Signed-off-by: Andreea-Cristina Bernat Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 60ab474bfff3..8ed55b0a1ce4 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -247,7 +247,7 @@ int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, for_each_active_dev_scope(devices, count, index, tmp) if (tmp == &info->dev->dev) { - rcu_assign_pointer(devices[index].dev, NULL); + RCU_INIT_POINTER(devices[index].dev, NULL); synchronize_rcu(); put_device(tmp); return 1; -- GitLab From 23e11811378259831777e8fdc8b9836faeaa72cd Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Tue, 5 Aug 2014 18:39:41 +0530 Subject: [PATCH 0187/1868] dma: imx-sdma: use module_platform_driver for SDMA driver Currently there is no module_exit declared in SDMA driver, so that once sdma module is inserted, it's shown with permanent attribute by lsmod, and it can't be removed. Use module_platform_driver to register/unregister SDMA driver and modify SDMA's remove operation, to make SDMA driver possible to be removed. Signed-off-by: Jiada Wang Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index f7626e37d0b8..40e65a4df94d 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1603,6 +1603,8 @@ static int __init sdma_probe(struct platform_device *pdev) sdma->dma_device.dev->dma_parms = &sdma->dma_parms; dma_set_max_seg_size(sdma->dma_device.dev, 65535); + platform_set_drvdata(pdev, sdma); + ret = dma_async_device_register(&sdma->dma_device); if (ret) { dev_err(&pdev->dev, "unable to register\n"); @@ -1640,7 +1642,20 @@ static int __init sdma_probe(struct platform_device *pdev) static int sdma_remove(struct platform_device *pdev) { - return -EBUSY; + struct sdma_engine *sdma = platform_get_drvdata(pdev); + struct resource *iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + + dma_async_device_unregister(&sdma->dma_device); + kfree(sdma->script_addrs); + free_irq(irq, sdma); + iounmap(sdma->regs); + release_mem_region(iores->start, resource_size(iores)); + kfree(sdma); + + platform_set_drvdata(pdev, NULL); + dev_info(&pdev->dev, "Removed...\n"); + return 0; } static struct platform_driver sdma_driver = { @@ -1650,13 +1665,10 @@ static struct platform_driver sdma_driver = { }, .id_table = sdma_devtypes, .remove = sdma_remove, + .probe = sdma_probe, }; -static int __init sdma_module_init(void) -{ - return platform_driver_probe(&sdma_driver, sdma_probe); -} -module_init(sdma_module_init); +module_platform_driver(sdma_driver); MODULE_AUTHOR("Sascha Hauer, Pengutronix "); MODULE_DESCRIPTION("i.MX SDMA driver"); -- GitLab From c12fe49726cfebacb47dca5f2bb544c38aa09e6d Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Tue, 5 Aug 2014 18:39:42 +0530 Subject: [PATCH 0188/1868] dma: imx-sdma: Adding tasklet_kill() in sdma_remove function. Several dma drivers calls tasklet_kill() in remove function. This is done because all running tasklets should be killed on remove. This is missing in imx sdma driver, so adding tasklet_kill() in sdma_remove function. Signed-off-by: Vignesh Raman Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 40e65a4df94d..c615e88c118a 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1645,12 +1645,19 @@ static int sdma_remove(struct platform_device *pdev) struct sdma_engine *sdma = platform_get_drvdata(pdev); struct resource *iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); int irq = platform_get_irq(pdev, 0); + int i; dma_async_device_unregister(&sdma->dma_device); kfree(sdma->script_addrs); free_irq(irq, sdma); iounmap(sdma->regs); release_mem_region(iores->start, resource_size(iores)); + /* Kill the tasklet */ + for (i = 0; i < MAX_DMA_CHANNELS; i++) { + struct sdma_channel *sdmac = &sdma->channel[i]; + + tasklet_kill(&sdmac->tasklet); + } kfree(sdma); platform_set_drvdata(pdev, NULL); -- GitLab From ddc643630f5deb1995d191719086b64873c67a44 Mon Sep 17 00:00:00 2001 From: Srikanth Thokala Date: Mon, 28 Jul 2014 17:47:48 +0530 Subject: [PATCH 0189/1868] dma: Add Xilinx AXI DMA DT Binding Documentation Device-tree binding documentation of Xilinx DMA Engine Signed-off-by: Srikanth Thokala Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- .../bindings/dma/xilinx/xilinx_dma.txt | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt new file mode 100644 index 000000000000..2291c4098730 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt @@ -0,0 +1,65 @@ +Xilinx AXI DMA engine, it does transfers between memory and AXI4 stream +target devices. It can be configured to have one channel or two channels. +If configured as two channels, one is to transmit to the device and another +is to receive from the device. + +Required properties: +- compatible: Should be "xlnx,axi-dma-1.00.a" +- #dma-cells: Should be <1>, see "dmas" property below +- reg: Should contain DMA registers location and length. +- dma-channel child node: Should have atleast one channel and can have upto + two channels per device. This node specifies the properties of each + DMA channel (see child node properties below). + +Optional properties: +- xlnx,include-sg: Tells whether configured for Scatter-mode in + the hardware. + +Required child node properties: +- compatible: It should be either "xlnx,axi-dma-mm2s-channel" or + "xlnx,axi-dma-s2mm-channel". +- interrupts: Should contain per channel DMA interrupts. +- xlnx,datawidth: Should contain the stream data width, take values + {32,64...1024}. + +Option child node properties: +- xlnx,include-dre: Tells whether hardware is configured for Data + Realignment Engine. + +Example: +++++++++ + +axi_dma_0: axidma@40400000 { + compatible = "xlnx,axi-dma-1.00.a"; + #dma_cells = <1>; + reg = < 0x40400000 0x10000 >; + dma-channel@40400000 { + compatible = "xlnx,axi-dma-mm2s-channel"; + interrupts = < 0 59 4 >; + xlnx,datawidth = <0x40>; + } ; + dma-channel@40400030 { + compatible = "xlnx,axi-dma-s2mm-channel"; + interrupts = < 0 58 4 >; + xlnx,datawidth = <0x40>; + } ; +} ; + + +* DMA client + +Required properties: +- dmas: a list of <[DMA device phandle] [Channel ID]> pairs, + where Channel ID is '0' for write/tx and '1' for read/rx + channel. +- dma-names: a list of DMA channel names, one per "dmas" entry + +Example: +++++++++ + +dmatest_0: dmatest@0 { + compatible ="xlnx,axi-dma-test-1.00.a"; + dmas = <&axi_dma_0 0 + &axi_dma_0 1>; + dma-names = "dma0", "dma1"; +} ; -- GitLab From 29a4bb1431035560b4be3fc5917c5ab8b8141204 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Wed, 13 Aug 2014 13:57:42 +0200 Subject: [PATCH 0190/1868] dma: xilinx: Remove .owner field for driver There is no need to init .owner field. Based on the patch from Peter Griffin "mmc: remove .owner field for drivers using module_platform_driver" This patch removes the superflous .owner field for drivers which use the module_platform_driver API, as this is overriden in platform_driver_register anyway." Signed-off-by: Michal Simek Reviewed-by: Levente Kurusa Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_vdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c index 42a13e8d4607..a6e64767186e 100644 --- a/drivers/dma/xilinx/xilinx_vdma.c +++ b/drivers/dma/xilinx/xilinx_vdma.c @@ -1365,7 +1365,6 @@ static const struct of_device_id xilinx_vdma_of_ids[] = { static struct platform_driver xilinx_vdma_driver = { .driver = { .name = "xilinx-vdma", - .owner = THIS_MODULE, .of_match_table = xilinx_vdma_of_ids, }, .probe = xilinx_vdma_probe, -- GitLab From 5828c60826e9422169b3711aa58a583242864cc8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 31 Jul 2014 18:36:20 +0300 Subject: [PATCH 0191/1868] mtd: ndfc: silence an array underflow static checker warning We check "cs" for array overflows but we don't check for underflows and it upsets the static checkers. Signed-off-by: Dan Carpenter Signed-off-by: Brian Norris --- drivers/mtd/nand/ndfc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/ndfc.c b/drivers/mtd/nand/ndfc.c index 69eaba690a99..253a644da76a 100644 --- a/drivers/mtd/nand/ndfc.c +++ b/drivers/mtd/nand/ndfc.c @@ -203,7 +203,8 @@ static int ndfc_probe(struct platform_device *ofdev) struct ndfc_controller *ndfc; const __be32 *reg; u32 ccr; - int err, len, cs; + u32 cs; + int err, len; /* Read the reg property to get the chip select */ reg = of_get_property(ofdev->dev.of_node, "reg", &len); -- GitLab From 9b6e5172e363b0c35a6be4d3197f3bcdc789292e Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Thu, 31 Jul 2014 16:31:16 +0200 Subject: [PATCH 0192/1868] mtd: use NULL instead of 0 for an address Use NULL instead of 0 when returning an address. This fixes a sparse warning. Signed-off-by: Martin Kepplinger Signed-off-by: Brian Norris --- drivers/mtd/maps/pcmciamtd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index a3cfad392ed6..af747af5eee9 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -89,7 +89,7 @@ static caddr_t remap_window(struct map_info *map, unsigned long to) if (!pcmcia_dev_present(dev->p_dev)) { pr_debug("device removed\n"); - return 0; + return NULL; } offset = to & ~(dev->win_size-1); -- GitLab From 796fe3648a13b311f5b9a125e2d2532a2ce7c78a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Poggi?= Date: Tue, 29 Jul 2014 15:27:27 +0200 Subject: [PATCH 0193/1868] mtd: atmel_nand: increase chip_delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some nand with 8k page size like Micron MT29F32G08ABAAAWP need more than 20us. Signed-off-by: Raphaël Poggi Signed-off-by: Brian Norris --- drivers/mtd/nand/atmel_nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index e321c564ff05..77bd877d8f28 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -2099,7 +2099,7 @@ static int atmel_nand_probe(struct platform_device *pdev) } nand_chip->ecc.mode = host->board.ecc_mode; - nand_chip->chip_delay = 20; /* 20us command delay time */ + nand_chip->chip_delay = 40; /* 40us command delay time */ if (host->board.bus_width_16) /* 16-bit bus width */ nand_chip->options |= NAND_BUSWIDTH_16; -- GitLab From a35571058ec8e7c82dceea90cdecead51674f963 Mon Sep 17 00:00:00 2001 From: "Wu, Josh" Date: Tue, 22 Jul 2014 17:24:18 +0800 Subject: [PATCH 0194/1868] mtd: atmel_nand: add pmecc support for 512, 1k, 4k, 8k page size PMECC can support 512, 1k, 2k, 4k, 8k page size. The driver currently only support 2k page size nand flash. So this patch add support to 512, 1k, 4k and 8k page size nand flash. Signed-off-by: Josh Wu Signed-off-by: Brian Norris --- drivers/mtd/nand/atmel_nand.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index 77bd877d8f28..e4d57ffef07e 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -1174,7 +1174,17 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, /* set ECC page size and oob layout */ switch (mtd->writesize) { + case 512: + case 1024: case 2048: + case 4096: + case 8192: + if (sector_size > mtd->writesize) { + dev_err(host->dev, "pmecc sector size is bigger than the page size!\n"); + err_no = -EINVAL; + goto err; + } + host->pmecc_degree = (sector_size == 512) ? PMECC_GF_DIMENSION_13 : PMECC_GF_DIMENSION_14; host->pmecc_cw_len = (1 << host->pmecc_degree) - 1; @@ -1201,13 +1211,9 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, nand_chip->ecc.layout = &atmel_pmecc_oobinfo; break; - case 512: - case 1024: - case 4096: - /* TODO */ + default: dev_warn(host->dev, "Unsupported page size for PMECC, use Software ECC\n"); - default: /* page size not handled by HW ECC */ /* switching back to soft ECC */ nand_chip->ecc.mode = NAND_ECC_SOFT; -- GitLab From 7b7d8982f0169d5ac67c6c2877449fb7f6968cac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jul 2014 14:31:53 -0700 Subject: [PATCH 0195/1868] mtd: fix linux/mtd/nand.h kernel-doc warning Fix kernel-doc warning in : Warning(..//include/linux/mtd/nand.h:795): No description found for parameter 'ecc' Signed-off-by: Randy Dunlap Cc: David Woodhouse Cc: Brian Norris Cc: linux-mtd@lists.infradead.org Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3083c53e0270..b7c11991cb09 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -766,6 +766,7 @@ struct nand_chip { * @options: stores various chip bit options * @id_len: The valid length of the @id. * @oobsize: OOB size + * @ecc: ECC correctability and step information from the datasheet. * @ecc.strength_ds: The ECC correctability from the datasheet, same as the * @ecc_strength_ds in nand_chip{}. * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the -- GitLab From f2fabe16b819cdead86fb38c8ab88a0d9c308293 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Sun, 27 Jul 2014 23:56:08 +0200 Subject: [PATCH 0196/1868] mtd: spi-nor: add support for Micron M25PX80 This commit adds the support in the spi-nor driver of the Micron M25PX80 flash, a 8 Mbit SPI flash from Micron. Signed-off-by: Thomas Petazzoni Signed-off-by: Brian Norris --- drivers/mtd/spi-nor/spi-nor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index b5ad6bebf5e7..4dc0c8662265 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -611,6 +611,7 @@ const struct spi_device_id spi_nor_ids[] = { { "m25px32-s0", INFO(0x207316, 0, 64 * 1024, 64, SECT_4K) }, { "m25px32-s1", INFO(0x206316, 0, 64 * 1024, 64, SECT_4K) }, { "m25px64", INFO(0x207117, 0, 64 * 1024, 128, 0) }, + { "m25px80", INFO(0x207114, 0, 64 * 1024, 16, 0) }, /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */ { "w25x10", INFO(0xef3011, 0, 64 * 1024, 2, SECT_4K) }, -- GitLab From 8fb7b9309c41407801958138db978eb38fd80c01 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 28 Jul 2014 21:19:55 +0800 Subject: [PATCH 0197/1868] mtd: atmel_nand: remove redundant dev_err call There is a error message within devm_ioremap_resource already, so remove the dev_err call to avoid redundant error message. Signed-off-by: Wei Yongjun Signed-off-by: Brian Norris --- drivers/mtd/nand/atmel_nand.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index e4d57ffef07e..0abc965caedf 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -1148,7 +1148,6 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, host->ecc = devm_ioremap_resource(&pdev->dev, regs); if (IS_ERR(host->ecc)) { - dev_err(host->dev, "ioremap failed\n"); err_no = PTR_ERR(host->ecc); goto err; } @@ -1156,8 +1155,6 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, regs_pmerr = platform_get_resource(pdev, IORESOURCE_MEM, 2); host->pmerrloc_base = devm_ioremap_resource(&pdev->dev, regs_pmerr); if (IS_ERR(host->pmerrloc_base)) { - dev_err(host->dev, - "Can not get I/O resource for PMECC ERRLOC controller!\n"); err_no = PTR_ERR(host->pmerrloc_base); goto err; } @@ -1165,7 +1162,6 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev, regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3); host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom); if (IS_ERR(host->pmecc_rom_base)) { - dev_err(host->dev, "Can not get I/O resource for ROM!\n"); err_no = PTR_ERR(host->pmecc_rom_base); goto err; } @@ -1536,10 +1532,8 @@ static int atmel_hw_nand_init_params(struct platform_device *pdev, } host->ecc = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(host->ecc)) { - dev_err(host->dev, "ioremap failed\n"); + if (IS_ERR(host->ecc)) return PTR_ERR(host->ecc); - } /* ECC is calculated for the whole page (1 step) */ nand_chip->ecc.size = mtd->writesize; @@ -2046,7 +2040,6 @@ static int atmel_nand_probe(struct platform_device *pdev) mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); host->io_base = devm_ioremap_resource(&pdev->dev, mem); if (IS_ERR(host->io_base)) { - dev_err(&pdev->dev, "atmel_nand: ioremap resource failed\n"); res = PTR_ERR(host->io_base); goto err_nand_ioremap; } -- GitLab From 2902330e7ac16d5962f114d92bb17631e9cb49e9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 11 Jul 2014 11:14:05 +0900 Subject: [PATCH 0198/1868] mtd: denali: avoid using a magic number MAP10 command with '0x2000' data sets up a read-ahead/write access. Signed-off-by: Masahiro Yamada Signed-off-by: Brian Norris --- drivers/mtd/nand/denali.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index 0b071a3136a2..da0fcc224739 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -74,6 +74,7 @@ MODULE_PARM_DESC(onfi_timing_mode, "Overrides default ONFI setting." #define SPARE_ACCESS 0x41 #define MAIN_ACCESS 0x42 #define MAIN_SPARE_ACCESS 0x43 +#define PIPELINE_ACCESS 0x2000 #define DENALI_READ 0 #define DENALI_WRITE 0x100 @@ -765,7 +766,7 @@ static int denali_send_pipeline_cmd(struct denali_nand_info *denali, iowrite32(cmd, denali->flash_mem); } else { index_addr(denali, (uint32_t)cmd, - 0x2000 | op | page_count); + PIPELINE_ACCESS | op | page_count); /* wait for command to be accepted * can always use status0 bit as the -- GitLab From 6f3c0f163103fb225c77b73ca17fc4ecea308103 Mon Sep 17 00:00:00 2001 From: Samarth Parikh Date: Wed, 16 Jul 2014 16:14:37 +0530 Subject: [PATCH 0199/1868] mtd: Fixed checkpatch seq_printf warnings Fixed checkpatch warnings: "WARNING: Prefer seq_puts to seq_printf" This patch is created with reference to the ongoing lkml thread https://lkml.org/lkml/2014/7/15/646 where Andrew Morton wrote: " - puts is presumably faster - puts doesn't go rogue if you accidentally pass it a "%". - this patch would actually make compiled object files few bytes smaller. Perhaps because seq_printf() is a varargs function, forcing the caller to pass args on the stack instead of in registers. " Signed-off-by: Samarth Parikh Signed-off-by: Brian Norris --- drivers/mtd/devices/docg3.c | 26 +++++++++++++------------- drivers/mtd/mtdswap.c | 4 ++-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index 91a169c44b39..21cc4b66feaa 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -1697,16 +1697,16 @@ static int dbg_asicmode_show(struct seq_file *s, void *p) switch (mode) { case DOC_ASICMODE_RESET: - pos += seq_printf(s, "reset"); + pos += seq_puts(s, "reset"); break; case DOC_ASICMODE_NORMAL: - pos += seq_printf(s, "normal"); + pos += seq_puts(s, "normal"); break; case DOC_ASICMODE_POWERDOWN: - pos += seq_printf(s, "powerdown"); + pos += seq_puts(s, "powerdown"); break; } - pos += seq_printf(s, ")\n"); + pos += seq_puts(s, ")\n"); return pos; } DEBUGFS_RO_ATTR(asic_mode, dbg_asicmode_show); @@ -1745,22 +1745,22 @@ static int dbg_protection_show(struct seq_file *s, void *p) pos += seq_printf(s, "Protection = 0x%02x (", protect); if (protect & DOC_PROTECT_FOUNDRY_OTP_LOCK) - pos += seq_printf(s, "FOUNDRY_OTP_LOCK,"); + pos += seq_puts(s, "FOUNDRY_OTP_LOCK,"); if (protect & DOC_PROTECT_CUSTOMER_OTP_LOCK) - pos += seq_printf(s, "CUSTOMER_OTP_LOCK,"); + pos += seq_puts(s, "CUSTOMER_OTP_LOCK,"); if (protect & DOC_PROTECT_LOCK_INPUT) - pos += seq_printf(s, "LOCK_INPUT,"); + pos += seq_puts(s, "LOCK_INPUT,"); if (protect & DOC_PROTECT_STICKY_LOCK) - pos += seq_printf(s, "STICKY_LOCK,"); + pos += seq_puts(s, "STICKY_LOCK,"); if (protect & DOC_PROTECT_PROTECTION_ENABLED) - pos += seq_printf(s, "PROTECTION ON,"); + pos += seq_puts(s, "PROTECTION ON,"); if (protect & DOC_PROTECT_IPL_DOWNLOAD_LOCK) - pos += seq_printf(s, "IPL_DOWNLOAD_LOCK,"); + pos += seq_puts(s, "IPL_DOWNLOAD_LOCK,"); if (protect & DOC_PROTECT_PROTECTION_ERROR) - pos += seq_printf(s, "PROTECT_ERR,"); + pos += seq_puts(s, "PROTECT_ERR,"); else - pos += seq_printf(s, "NO_PROTECT_ERR"); - pos += seq_printf(s, ")\n"); + pos += seq_puts(s, "NO_PROTECT_ERR"); + pos += seq_puts(s, ")\n"); pos += seq_printf(s, "DPS0 = 0x%02x : " "Protected area [0x%x - 0x%x] : OTP=%d, READ=%d, " diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 8b33b26eb12b..0ec96cd5dc78 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -1287,7 +1287,7 @@ static int mtdswap_show(struct seq_file *s, void *data) seq_printf(s, "total erasures: %lu\n", sum); - seq_printf(s, "\n"); + seq_puts(s, "\n"); seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); @@ -1296,7 +1296,7 @@ static int mtdswap_show(struct seq_file *s, void *data) seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); - seq_printf(s, "\n"); + seq_puts(s, "\n"); seq_printf(s, "total pages: %u\n", pages); seq_printf(s, "pages mapped: %u\n", mapped); -- GitLab From 57d3a9a89a0645f3597561e214f8d6852a2c56b4 Mon Sep 17 00:00:00 2001 From: White Ding Date: Thu, 24 Jul 2014 00:10:45 +0800 Subject: [PATCH 0200/1868] mtd: nand: fix nand_lock/unlock() function Do nand reset before write protect check. If we want to check the WP# low or high through STATUS READ and check bit 7, we must reset the device, other operation (eg.erase/program a locked block) can also clear the bit 7 of status register. As we know the status register can be refreshed, if we do some operation to trigger it, for example if we do erase/program operation to one block that is locked, then READ STATUS, the bit 7 of READ STATUS will be 0 indicate the device in write protect, then if we do erase/program operation to another block that is unlocked, the bit 7 of READ STATUS will be 1 indicate the device is not write protect. Suppose we checked the bit 7 of READ STATUS is 0 then judge the WP# is low (write protect), but in this case the WP# maybe high if we do erase/program operation to a locked block, so we must reset the device if we want to check the WP# low or high through STATUS READ and check bit 7. Signed-off-by: White Ding Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_base.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index d8cdf06343fb..1a27c2da29ff 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -982,6 +982,15 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) chip->select_chip(mtd, chipnr); + /* + * Reset the chip. + * If we want to check the WP through READ STATUS and check the bit 7 + * we must reset the chip + * some operation can also clear the bit 7 of status register + * eg. erase/program a locked block + */ + chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + /* Check, if it is write protected */ if (nand_check_wp(mtd)) { pr_debug("%s: device is write protected!\n", @@ -1032,6 +1041,15 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) chip->select_chip(mtd, chipnr); + /* + * Reset the chip. + * If we want to check the WP through READ STATUS and check the bit 7 + * we must reset the chip + * some operation can also clear the bit 7 of status register + * eg. erase/program a locked block + */ + chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + /* Check, if it is write protected */ if (nand_check_wp(mtd)) { pr_debug("%s: device is write protected!\n", -- GitLab From 36c6a7ac74044b8025488c018279115bb3c32eb0 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:06:19 -0700 Subject: [PATCH 0201/1868] mtd: cfi_cmdset_0002: allow retry/timeout loop to exit The variable 'retries' is never modified, so if the reset operation never is going to complete, we'll get stuck in an infinite loop. It looks like the intention was to decrement 'retries' on every loop. Untested. Caught by Coverity. Signed-off-by: Brian Norris --- drivers/mtd/chips/cfi_cmdset_0002.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 5a4bfe33112a..6da141af9cba 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2029,6 +2029,8 @@ static int cfi_amdstd_panic_wait(struct map_info *map, struct flchip *chip, udelay(1); } + + retries--; } /* the chip never became ready */ -- GitLab From 0c2b4e21444d0e274e91fc7db85caddb30988853 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:06:27 -0700 Subject: [PATCH 0202/1868] mtd: correct upper bounds check for mtd_*() APIs When checking the upper boundary (i.e., whether an address is higher than the maximum size of the MTD), we should be doing an inclusive check (greater or equal). For instance, an address of 16MB (0x1000000) on a 16MB device is invalid. The strengthening of this bounds check is redundant for those which already have a address+length check and ensure that the length is non-zero, but let's just fix them all, for completeness. Signed-off-by: Brian Norris --- drivers/mtd/mtdcore.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index e4831b4159db..c1015173f2d9 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -778,7 +778,7 @@ EXPORT_SYMBOL_GPL(__put_mtd_device); */ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) { - if (instr->addr > mtd->size || instr->len > mtd->size - instr->addr) + if (instr->addr >= mtd->size || instr->len > mtd->size - instr->addr) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; @@ -804,7 +804,7 @@ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, *phys = 0; if (!mtd->_point) return -EOPNOTSUPP; - if (from < 0 || from > mtd->size || len > mtd->size - from) + if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; @@ -817,7 +817,7 @@ int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { if (!mtd->_point) return -EOPNOTSUPP; - if (from < 0 || from > mtd->size || len > mtd->size - from) + if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; @@ -835,7 +835,7 @@ unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, { if (!mtd->_get_unmapped_area) return -EOPNOTSUPP; - if (offset > mtd->size || len > mtd->size - offset) + if (offset >= mtd->size || len > mtd->size - offset) return -EINVAL; return mtd->_get_unmapped_area(mtd, len, offset, flags); } @@ -846,7 +846,7 @@ int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, { int ret_code; *retlen = 0; - if (from < 0 || from > mtd->size || len > mtd->size - from) + if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; @@ -869,7 +869,7 @@ int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { *retlen = 0; - if (to < 0 || to > mtd->size || len > mtd->size - to) + if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!mtd->_write || !(mtd->flags & MTD_WRITEABLE)) return -EROFS; @@ -892,7 +892,7 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, *retlen = 0; if (!mtd->_panic_write) return -EOPNOTSUPP; - if (to < 0 || to > mtd->size || len > mtd->size - to) + if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; @@ -1011,7 +1011,7 @@ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_lock) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; @@ -1023,7 +1023,7 @@ int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_unlock) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; @@ -1035,7 +1035,7 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) { if (!mtd->_is_locked) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size || len > mtd->size - ofs) + if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; @@ -1045,7 +1045,7 @@ EXPORT_SYMBOL_GPL(mtd_is_locked); int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) { - if (ofs < 0 || ofs > mtd->size) + if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!mtd->_block_isreserved) return 0; @@ -1055,7 +1055,7 @@ EXPORT_SYMBOL_GPL(mtd_block_isreserved); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) { - if (ofs < 0 || ofs > mtd->size) + if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!mtd->_block_isbad) return 0; @@ -1067,7 +1067,7 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { if (!mtd->_block_markbad) return -EOPNOTSUPP; - if (ofs < 0 || ofs > mtd->size) + if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; -- GitLab From f7f0d358f5f2f1133b5a14337028ddab848cd74e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:06:39 -0700 Subject: [PATCH 0203/1868] mtd: sm_ftl: initialize error code There is one theoretical case that could fall through to using an uninitialized value as the return code. Let's give it a value of 0. Untested. Caught by Coverity. Signed-off-by: Brian Norris --- drivers/mtd/sm_ftl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index cf49c22673b9..c23184a47fc4 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -1058,7 +1058,7 @@ static int sm_write(struct mtd_blktrans_dev *dev, { struct sm_ftl *ftl = dev->priv; struct ftl_zone *zone; - int error, zone_num, block, boffset; + int error = 0, zone_num, block, boffset; BUG_ON(ftl->readonly); sm_break_offset(ftl, sec_no << 9, &zone_num, &block, &boffset); -- GitLab From 5e47212831ac565993d21ebd36216d98f2b58f30 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:06:47 -0700 Subject: [PATCH 0204/1868] mtd: remove dead non-char logic MTD used to allow compiling out character device support. This was dropped in the following commit, but some of the accompanying logic was never dropped: commit 660685d9d1b4730f0b5ca97fa95f272f99c63bce Author: Artem Bityutskiy Date: Thu Mar 14 13:27:40 2013 +0200 mtd: merge mtdchar module with mtdcore The weird logic was flagged by Coverity. Signed-off-by: Brian Norris Cc: Artem Bityutskiy --- drivers/mtd/mtdcore.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index c1015173f2d9..4c611871d7e6 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -105,12 +105,11 @@ static LIST_HEAD(mtd_notifiers); */ static void mtd_release(struct device *dev) { - struct mtd_info __maybe_unused *mtd = dev_get_drvdata(dev); + struct mtd_info *mtd = dev_get_drvdata(dev); dev_t index = MTD_DEVT(mtd->index); - /* remove /dev/mtdXro node if needed */ - if (index) - device_destroy(&mtd_class, index + 1); + /* remove /dev/mtdXro node */ + device_destroy(&mtd_class, index + 1); } static int mtd_cls_suspend(struct device *dev, pm_message_t state) @@ -442,10 +441,8 @@ int add_mtd_device(struct mtd_info *mtd) if (device_register(&mtd->dev) != 0) goto fail_added; - if (MTD_DEVT(i)) - device_create(&mtd_class, mtd->dev.parent, - MTD_DEVT(i) + 1, - NULL, "mtd%dro", i); + device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, + "mtd%dro", i); pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing -- GitLab From 8c3f3f1d7941bcb25590b784f84accd7dcb44ba3 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:02 -0700 Subject: [PATCH 0205/1868] mtd: mtdswap: fix integer overflow Caught by Coverity. Signed-off-by: Brian Norris --- drivers/mtd/mtdswap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 0ec96cd5dc78..48cf6f98df44 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -1474,7 +1474,7 @@ static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) } eblocks = mtd_div_by_eb(use_size, mtd); - use_size = eblocks * mtd->erasesize; + use_size = (uint64_t)eblocks * mtd->erasesize; bad_blocks = mtdswap_badblocks(mtd, use_size); eavailable = eblocks - bad_blocks; -- GitLab From 1001ff7a4f64f3f4264e69d3ed70ff428f627e01 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:12 -0700 Subject: [PATCH 0206/1868] mtd: tests: fix integer overflow issues These multiplications are done with 32-bit arithmetic, then converted to 64-bit. We should widen the integers first to prevent overflow. This could be a problem for large (>4GB) MTD's. Detected by Coverity. Signed-off-by: Brian Norris Cc: Akinobu Mita --- drivers/mtd/tests/mtd_test.c | 4 ++-- drivers/mtd/tests/nandbiterrs.c | 2 +- drivers/mtd/tests/oobtest.c | 8 ++++---- drivers/mtd/tests/pagetest.c | 4 ++-- drivers/mtd/tests/readtest.c | 2 +- drivers/mtd/tests/speedtest.c | 14 +++++++------- drivers/mtd/tests/subpagetest.c | 10 +++++----- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/mtd/tests/mtd_test.c b/drivers/mtd/tests/mtd_test.c index 111ee46a7428..34736bbcc07b 100644 --- a/drivers/mtd/tests/mtd_test.c +++ b/drivers/mtd/tests/mtd_test.c @@ -10,7 +10,7 @@ int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum) { int err; struct erase_info ei; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; memset(&ei, 0, sizeof(struct erase_info)); ei.mtd = mtd; @@ -33,7 +33,7 @@ int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum) static int is_block_bad(struct mtd_info *mtd, unsigned int ebnum) { int ret; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; ret = mtd_block_isbad(mtd, addr); if (ret) diff --git a/drivers/mtd/tests/nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c index 6f976159611f..273f7e553954 100644 --- a/drivers/mtd/tests/nandbiterrs.c +++ b/drivers/mtd/tests/nandbiterrs.c @@ -364,7 +364,7 @@ static int __init mtd_nandbiterrs_init(void) pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize); - offset = page_offset * mtd->writesize; + offset = (loff_t)page_offset * mtd->writesize; eraseblock = mtd_div_by_eb(offset, mtd); pr_info("Using page=%u, offset=%llu, eraseblock=%u\n", diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c index f19ab1acde1f..dc4f9602b97e 100644 --- a/drivers/mtd/tests/oobtest.c +++ b/drivers/mtd/tests/oobtest.c @@ -120,7 +120,7 @@ static int verify_eraseblock(int ebnum) int i; struct mtd_oob_ops ops; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt); for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) { @@ -214,7 +214,7 @@ static int verify_eraseblock_in_one_go(int ebnum) { struct mtd_oob_ops ops; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; size_t len = mtd->ecclayout->oobavail * pgcnt; prandom_bytes_state(&rnd_state, writebuf, len); @@ -568,7 +568,7 @@ static int __init mtd_oobtest_init(void) size_t sz = mtd->ecclayout->oobavail; if (bbt[i] || bbt[i + 1]) continue; - addr = (i + 1) * mtd->erasesize - mtd->writesize; + addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize; prandom_bytes_state(&rnd_state, writebuf, sz * cnt); for (pg = 0; pg < cnt; ++pg) { ops.mode = MTD_OPS_AUTO_OOB; @@ -598,7 +598,7 @@ static int __init mtd_oobtest_init(void) continue; prandom_bytes_state(&rnd_state, writebuf, mtd->ecclayout->oobavail * 2); - addr = (i + 1) * mtd->erasesize - mtd->writesize; + addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize; ops.mode = MTD_OPS_AUTO_OOB; ops.len = 0; ops.retlen = 0; diff --git a/drivers/mtd/tests/pagetest.c b/drivers/mtd/tests/pagetest.c index ed2d3f656fd2..88296e888e9d 100644 --- a/drivers/mtd/tests/pagetest.c +++ b/drivers/mtd/tests/pagetest.c @@ -52,7 +52,7 @@ static struct rnd_state rnd_state; static int write_eraseblock(int ebnum) { - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize); cond_resched(); @@ -64,7 +64,7 @@ static int verify_eraseblock(int ebnum) uint32_t j; int err = 0, i; loff_t addr0, addrn; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; addr0 = 0; for (i = 0; i < ebcnt && bbt[i]; ++i) diff --git a/drivers/mtd/tests/readtest.c b/drivers/mtd/tests/readtest.c index 626e66d0f7e7..a54cf1511114 100644 --- a/drivers/mtd/tests/readtest.c +++ b/drivers/mtd/tests/readtest.c @@ -47,7 +47,7 @@ static int pgcnt; static int read_eraseblock_by_page(int ebnum) { int i, ret, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; void *oobbuf = iobuf1; diff --git a/drivers/mtd/tests/speedtest.c b/drivers/mtd/tests/speedtest.c index 87ff6a29f84e..5ee9f7021020 100644 --- a/drivers/mtd/tests/speedtest.c +++ b/drivers/mtd/tests/speedtest.c @@ -55,7 +55,7 @@ static int multiblock_erase(int ebnum, int blocks) { int err; struct erase_info ei; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; memset(&ei, 0, sizeof(struct erase_info)); ei.mtd = mtd; @@ -80,7 +80,7 @@ static int multiblock_erase(int ebnum, int blocks) static int write_eraseblock(int ebnum) { - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; return mtdtest_write(mtd, addr, mtd->erasesize, iobuf); } @@ -88,7 +88,7 @@ static int write_eraseblock(int ebnum) static int write_eraseblock_by_page(int ebnum) { int i, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < pgcnt; i++) { @@ -106,7 +106,7 @@ static int write_eraseblock_by_2pages(int ebnum) { size_t sz = pgsize * 2; int i, n = pgcnt / 2, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < n; i++) { @@ -124,7 +124,7 @@ static int write_eraseblock_by_2pages(int ebnum) static int read_eraseblock(int ebnum) { - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; return mtdtest_read(mtd, addr, mtd->erasesize, iobuf); } @@ -132,7 +132,7 @@ static int read_eraseblock(int ebnum) static int read_eraseblock_by_page(int ebnum) { int i, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < pgcnt; i++) { @@ -150,7 +150,7 @@ static int read_eraseblock_by_2pages(int ebnum) { size_t sz = pgsize * 2; int i, n = pgcnt / 2, err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; void *buf = iobuf; for (i = 0; i < n; i++) { diff --git a/drivers/mtd/tests/subpagetest.c b/drivers/mtd/tests/subpagetest.c index a876371ad410..7b59ef522d5e 100644 --- a/drivers/mtd/tests/subpagetest.c +++ b/drivers/mtd/tests/subpagetest.c @@ -57,7 +57,7 @@ static int write_eraseblock(int ebnum) { size_t written; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, subpgsize); err = mtd_write(mtd, addr, subpgsize, &written, writebuf); @@ -92,7 +92,7 @@ static int write_eraseblock2(int ebnum) { size_t written; int err = 0, k; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) @@ -131,7 +131,7 @@ static int verify_eraseblock(int ebnum) { size_t read; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; prandom_bytes_state(&rnd_state, writebuf, subpgsize); clear_data(readbuf, subpgsize); @@ -192,7 +192,7 @@ static int verify_eraseblock2(int ebnum) { size_t read; int err = 0, k; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; for (k = 1; k < 33; ++k) { if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize) @@ -227,7 +227,7 @@ static int verify_eraseblock_ff(int ebnum) uint32_t j; size_t read; int err = 0; - loff_t addr = ebnum * mtd->erasesize; + loff_t addr = (loff_t)ebnum * mtd->erasesize; memset(writebuf, 0xff, subpgsize); for (j = 0; j < mtd->erasesize / subpgsize; ++j) { -- GitLab From 31f754628cbb12c983600f22d9f0fed50dfe2134 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:22 -0700 Subject: [PATCH 0207/1868] mtd: use __packed shorthand Signed-off-by: Brian Norris --- drivers/mtd/mtdswap.c | 2 +- drivers/mtd/nand/sm_common.h | 2 +- include/linux/mtd/cfi.h | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c index 48cf6f98df44..fc8b3d16cce7 100644 --- a/drivers/mtd/mtdswap.c +++ b/drivers/mtd/mtdswap.c @@ -145,7 +145,7 @@ struct mtdswap_dev { struct mtdswap_oobdata { __le16 magic; __le32 count; -} __attribute__((packed)); +} __packed; #define MTDSWAP_MAGIC_CLEAN 0x2095 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h index 00f4a83359b2..d3e028e58b0f 100644 --- a/drivers/mtd/nand/sm_common.h +++ b/drivers/mtd/nand/sm_common.h @@ -18,7 +18,7 @@ struct sm_oob { uint8_t ecc2[3]; uint8_t lba_copy2[2]; uint8_t ecc1[3]; -} __attribute__((packed)); +} __packed; /* one sector is always 512 bytes, but it can consist of two nand pages */ diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 37ef6b194089..299d7d31fe53 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -153,7 +153,7 @@ struct cfi_ident { uint16_t MaxBufWriteSize; uint8_t NumEraseRegions; uint32_t EraseRegionInfo[0]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; /* Extended Query Structure for both PRI and ALT */ @@ -161,7 +161,7 @@ struct cfi_extquery { uint8_t pri[3]; uint8_t MajorVersion; uint8_t MinorVersion; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */ @@ -180,7 +180,7 @@ struct cfi_pri_intelext { uint8_t FactProtRegSize; uint8_t UserProtRegSize; uint8_t extra[0]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_otpinfo { uint32_t ProtRegAddr; @@ -188,7 +188,7 @@ struct cfi_intelext_otpinfo { uint8_t FactProtRegSize; uint16_t UserGroups; uint8_t UserProtRegSize; -} __attribute__((packed)); +} __packed; struct cfi_intelext_blockinfo { uint16_t NumIdentBlocks; @@ -196,7 +196,7 @@ struct cfi_intelext_blockinfo { uint16_t MinBlockEraseCycles; uint8_t BitsPerCell; uint8_t BlockCap; -} __attribute__((packed)); +} __packed; struct cfi_intelext_regioninfo { uint16_t NumIdentPartitions; @@ -205,7 +205,7 @@ struct cfi_intelext_regioninfo { uint8_t NumOpAllowedSimEraMode; uint8_t NumBlockTypes; struct cfi_intelext_blockinfo BlockTypes[1]; -} __attribute__((packed)); +} __packed; struct cfi_intelext_programming_regioninfo { uint8_t ProgRegShift; @@ -214,7 +214,7 @@ struct cfi_intelext_programming_regioninfo { uint8_t Reserved2; uint8_t ControlInvalid; uint8_t Reserved3; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */ @@ -233,7 +233,7 @@ struct cfi_pri_amdstd { uint8_t VppMin; uint8_t VppMax; uint8_t TopBottom; -} __attribute__((packed)); +} __packed; /* Vendor-Specific PRI for Atmel chips (command set 0x0002) */ @@ -245,18 +245,18 @@ struct cfi_pri_atmel { uint8_t BottomBoot; uint8_t BurstMode; uint8_t PageMode; -} __attribute__((packed)); +} __packed; struct cfi_pri_query { uint8_t NumFields; uint32_t ProtField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; struct cfi_bri_query { uint8_t PageModeReadCap; uint8_t NumFields; uint32_t ConfField[1]; /* Not host ordered */ -} __attribute__((packed)); +} __packed; #define P_ID_NONE 0x0000 #define P_ID_INTEL_EXT 0x0001 -- GitLab From c115add9d073752d38f6517882dfeafe76fc4458 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:31 -0700 Subject: [PATCH 0208/1868] mtd: nand: denali: set proper error code on timeout The condition "if (irq_status == 0)" already ensures that one half of the ternary ?: is dead. I think this should probably actually be a FAIL, not a PASS. Caught by Coverity. Signed-off-by: Brian Norris Cc: Jamie Iles --- drivers/mtd/nand/denali.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index da0fcc224739..4885a0f573cd 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -1062,9 +1062,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *chip, dev_err(denali->dev, "timeout on write_page (type = %d)\n", raw_xfer); - denali->status = - (irq_status & INTR_STATUS__PROGRAM_FAIL) ? - NAND_STATUS_FAIL : PASS; + denali->status = NAND_STATUS_FAIL; } denali_enable_dma(denali, false); -- GitLab From b033e1aac9afd314add799b6cd2a5489f892757f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:44 -0700 Subject: [PATCH 0209/1868] mtd: nandsim: fix integer widening This multiplication should be done in 64-bit, not 32-bit. Caught by Coverity. Signed-off-by: Brian Norris --- drivers/mtd/nand/nandsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 4f0d83648e5a..7dc1dd28d896 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -827,7 +827,7 @@ static int parse_badblocks(struct nandsim *ns, struct mtd_info *mtd) NS_ERR("invalid badblocks.\n"); return -EINVAL; } - offset = erase_block_no * ns->geom.secsz; + offset = (loff_t)erase_block_no * ns->geom.secsz; if (mtd_block_markbad(mtd, offset)) { NS_ERR("invalid badblocks.\n"); return -EINVAL; -- GitLab From 7a6f43958a53020f85818ff5c895623e88781fd6 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:07:56 -0700 Subject: [PATCH 0210/1868] mtd: maps: solutionengine: drop excess dependency Already depends on SOLUTION_ENGINE, so we don't need the SUPERH dependency too. Signed-off-by: Brian Norris --- drivers/mtd/maps/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 21b2874a303b..ba801d2c6dcc 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -249,7 +249,7 @@ config MTD_CFI_FLAGADM config MTD_SOLUTIONENGINE tristate "CFI Flash device mapped on Hitachi SolutionEngine" - depends on SUPERH && SOLUTION_ENGINE && MTD_CFI && MTD_REDBOOT_PARTS + depends on SOLUTION_ENGINE && MTD_CFI && MTD_REDBOOT_PARTS help This enables access to the flash chips on the Hitachi SolutionEngine and similar boards. Say 'Y' if you are building a kernel for such a board. -- GitLab From 537ab1bd47d6518e8a40207a80dd0c2c4bc43aed Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:08:03 -0700 Subject: [PATCH 0211/1868] mtd: nand: fix integer widening problems chip->pagebuf is a 32-bit type (int), so the shift will only be applied as 32-bit. Fix this for 64-bit safety. Caught by Coverity. Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 1a27c2da29ff..ae6e7c47f8e2 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2409,8 +2409,8 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, blockmask = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1; /* Invalidate the page cache, when we write to the cached page */ - if (to <= (chip->pagebuf << chip->page_shift) && - (chip->pagebuf << chip->page_shift) < (to + ops->len)) + if (to <= ((loff_t)chip->pagebuf << chip->page_shift) && + ((loff_t)chip->pagebuf << chip->page_shift) < (to + ops->len)) chip->pagebuf = -1; /* Don't allow multipage oob writes with offset */ -- GitLab From 1cc8d8413327a684cd5e93cd52ececb0223bb40b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Jul 2014 19:08:13 -0700 Subject: [PATCH 0212/1868] mtd: terminate user-provided string Noticed by Coverity as a potential security issue. Signed-off-by: Brian Norris --- drivers/mtd/mtdchar.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index a0f54e80670c..53563955931b 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -549,6 +549,9 @@ static int mtdchar_blkpg_ioctl(struct mtd_info *mtd, if (mtd_is_partition(mtd)) return -EINVAL; + /* Sanitize user input */ + p.devname[BLKPG_DEVNAMELTH - 1] = '\0'; + return mtd_add_partition(mtd, p.devname, p.start, p.length); case BLKPG_DEL_PARTITION: -- GitLab From ff0a215438cf7be0a652cb3457f562539bd40b22 Mon Sep 17 00:00:00 2001 From: "Wu, Josh" Date: Tue, 5 Aug 2014 18:38:52 +0800 Subject: [PATCH 0213/1868] mtd: atmel_nand: NFC: fix mtd_nandbiterrs.ko test fail when using sram write When enable NFC sram write, it will failed the mtd_nandbiterrs.ko test. As in driver's nfc_sram_write_page(), if ops->mode equal to MTD_OSP_RAW, driver assumes the data buffer contains one page data and one oob data followed. And driver will write the page data and oob data to nand. But this is wrong implementation. Since the data buffer don't contains the oob data to write. We should write the chip->oob_poi to nand's oob. So this patch fix it by writing the oob data from chip->oob_poi. Signed-off-by: Josh Wu Signed-off-by: Brian Norris --- drivers/mtd/nand/atmel_nand.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index 0abc965caedf..9c5f717bda54 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -1907,15 +1907,7 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip, if (offset || (data_len < mtd->writesize)) return -EINVAL; - cfg = nfc_readl(host->nfc->hsmc_regs, CFG); len = mtd->writesize; - - if (unlikely(raw)) { - len += mtd->oobsize; - nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE); - } else - nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE); - /* Copy page data to sram that will write to nand via NFC */ if (use_dma) { if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0) @@ -1925,6 +1917,15 @@ static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip, memcpy32_toio(sram, buf, len); } + cfg = nfc_readl(host->nfc->hsmc_regs, CFG); + if (unlikely(raw) && oob_required) { + memcpy32_toio(sram + len, chip->oob_poi, mtd->oobsize); + len += mtd->oobsize; + nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE); + } else { + nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE); + } + if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc) /* * When use NFC sram, need set up PMECC before send -- GitLab From bd8898db3e03147d9d7ddd48876fb3f3bcbab6c1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 9 Aug 2014 19:07:53 +0200 Subject: [PATCH 0214/1868] mtd: nand: Use ULL-suffix for big u64 constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/mtd/nand/nand_timings.c:45: warning: integer constant is too large for ‘long’ type [ Editorial note: This is a false warning. Looking at ISO draft N1124 (this is approximately C11, the first PDF I had lying around), section 6.4.4.1 (statement 5): "The type of an integer constant is the first of the corresponding list in which its value can be represented." So this should not be an overflow, and any toolchain that says so (e.g., GCC 4.4) is buggy. -Brian ] Signed-off-by: Geert Uytterhoeven Signed-off-by: Brian Norris --- drivers/mtd/nand/nand_timings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c index 8b36253420fa..e81470a8ac67 100644 --- a/drivers/mtd/nand/nand_timings.c +++ b/drivers/mtd/nand/nand_timings.c @@ -42,7 +42,7 @@ static const struct nand_sdr_timings onfi_sdr_timings[] = { .tRHZ_max = 200000, .tRLOH_min = 0, .tRP_min = 50000, - .tRST_max = 250000000000, + .tRST_max = 250000000000ULL, .tWB_max = 200000, .tRR_min = 40000, .tWC_min = 100000, -- GitLab From 02f8a24e7b1c253ee37edc684200c11300de23f9 Mon Sep 17 00:00:00 2001 From: Aaron Wu Date: Thu, 7 Aug 2014 11:43:49 +0800 Subject: [PATCH 0215/1868] mtd: gpio_flash: handle case where offset + len exceeds the window size Fix the bug in handling gpio flash read/write when offset + len from MTD exceeds the window size Signed-off-by: Aaron Wu [Brian: made some commentary edits. Also note that the BUG_ON() was provably false for all non-negative inputs (since x % y <= x), so we dropped it.] Signed-off-by: Brian Norris --- drivers/mtd/maps/gpio-addr-flash.c | 42 ++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/mtd/maps/gpio-addr-flash.c b/drivers/mtd/maps/gpio-addr-flash.c index a4c477b9fdd6..2fb346091af2 100644 --- a/drivers/mtd/maps/gpio-addr-flash.c +++ b/drivers/mtd/maps/gpio-addr-flash.c @@ -99,22 +99,28 @@ static map_word gf_read(struct map_info *map, unsigned long ofs) * @from: flash offset to copy from * @len: how much to copy * - * We rely on the MTD layer to chunk up copies such that a single request here - * will not cross a window size. This allows us to only wiggle the GPIOs once - * before falling back to a normal memcpy. Reading the higher layer code shows - * that this is indeed the case, but add a BUG_ON() to future proof. + * The "from" region may straddle more than one window, so toggle the GPIOs for + * each window region before reading its data. */ static void gf_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len) { struct async_state *state = gf_map_info_to_state(map); - gf_set_gpios(state, from); + int this_len; - /* BUG if operation crosses the win_size */ - BUG_ON(!((from + len) % state->win_size <= (from + len))); + while (len) { + if ((from % state->win_size) + len > state->win_size) + this_len = state->win_size - (from % state->win_size); + else + this_len = len; - /* operation does not cross the win_size, so one shot it */ - memcpy_fromio(to, map->virt + (from % state->win_size), len); + gf_set_gpios(state, from); + memcpy_fromio(to, map->virt + (from % state->win_size), + this_len); + len -= this_len; + from += this_len; + to += this_len; + } } /** @@ -147,13 +153,21 @@ static void gf_copy_to(struct map_info *map, unsigned long to, { struct async_state *state = gf_map_info_to_state(map); - gf_set_gpios(state, to); + int this_len; + + while (len) { + if ((to % state->win_size) + len > state->win_size) + this_len = state->win_size - (to % state->win_size); + else + this_len = len; - /* BUG if operation crosses the win_size */ - BUG_ON(!((to + len) % state->win_size <= (to + len))); + gf_set_gpios(state, to); + memcpy_toio(map->virt + (to % state->win_size), from, len); - /* operation does not cross the win_size, so one shot it */ - memcpy_toio(map->virt + (to % state->win_size), from, len); + len -= this_len; + to += this_len; + from += this_len; + } } static const char * const part_probe_types[] = { -- GitLab From ab75e89c013d8fff8bd8a6e520d184c3da1a4583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 7 Aug 2014 09:47:01 +0200 Subject: [PATCH 0216/1868] mtd: spi-nor: remove duplicated w25q128 entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rafał Miłecki Acked-by: Huang Shijie Signed-off-by: Brian Norris --- drivers/mtd/spi-nor/spi-nor.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 4dc0c8662265..2fea172dd530 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -624,7 +624,6 @@ const struct spi_device_id spi_nor_ids[] = { { "w25q32dw", INFO(0xef6016, 0, 64 * 1024, 64, SECT_4K) }, { "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) }, { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) }, - { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) }, { "w25q80", INFO(0xef5014, 0, 64 * 1024, 16, SECT_4K) }, { "w25q80bl", INFO(0xef4014, 0, 64 * 1024, 16, SECT_4K) }, { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) }, -- GitLab From 54ea17a597b00e46b3720e75dd7595cd5dfa5670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 7 Aug 2014 09:47:02 +0200 Subject: [PATCH 0217/1868] mtd: spi-nor: drop jedec_probe /helper/ function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a one-liner doing no magic and its name may be confusing because it does not have to use JEDEC (e.g. when using alternative read_id). Signed-off-by: Rafał Miłecki Acked-by: Huang Shijie Signed-off-by: Brian Norris --- drivers/mtd/spi-nor/spi-nor.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 2fea172dd530..03e0ab8b2086 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -671,11 +671,6 @@ static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor) return ERR_PTR(-ENODEV); } -static const struct spi_device_id *jedec_probe(struct spi_nor *nor) -{ - return nor->read_id(nor); -} - static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { @@ -958,7 +953,7 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, if (info->jedec_id) { const struct spi_device_id *jid; - jid = jedec_probe(nor); + jid = nor->read_id(nor); if (IS_ERR(jid)) { return PTR_ERR(jid); } else if (jid != id) { -- GitLab From 06ed5c2bfacaf67039e87a213fa5d1cdde34246a Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 20 Aug 2014 16:02:59 +0200 Subject: [PATCH 0218/1868] kbuild: Make scripts executable The Makefiles call the respective interpreter explicitly, but this makes it easier to use the scripts manually. Signed-off-by: Michal Marek --- scripts/bootgraph.pl | 0 scripts/export_report.pl | 0 scripts/gcc-goto.sh | 0 scripts/gcc-ld | 0 scripts/gcc-version.sh | 0 scripts/gcc-x86_32-has-stack-protector.sh | 0 scripts/gcc-x86_64-has-stack-protector.sh | 0 scripts/gen_initramfs_list.sh | 0 scripts/headers_check.pl | 0 scripts/headers_install.sh | 0 scripts/kconfig/lxdialog/check-lxdialog.sh | 0 scripts/kconfig/streamline_config.pl | 0 scripts/link-vmlinux.sh | 0 scripts/markup_oops.pl | 0 scripts/mkmakefile | 0 scripts/mksysmap | 0 scripts/package/builddeb | 0 scripts/package/buildtar | 0 scripts/profile2linkerlist.pl | 0 scripts/rt-tester/rt-tester.py | 0 scripts/selinux/install_policy.sh | 0 scripts/tracing/draw_functrace.py | 0 scripts/xz_wrap.sh | 0 23 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/bootgraph.pl mode change 100644 => 100755 scripts/export_report.pl mode change 100644 => 100755 scripts/gcc-goto.sh mode change 100644 => 100755 scripts/gcc-ld mode change 100644 => 100755 scripts/gcc-version.sh mode change 100644 => 100755 scripts/gcc-x86_32-has-stack-protector.sh mode change 100644 => 100755 scripts/gcc-x86_64-has-stack-protector.sh mode change 100644 => 100755 scripts/gen_initramfs_list.sh mode change 100644 => 100755 scripts/headers_check.pl mode change 100644 => 100755 scripts/headers_install.sh mode change 100644 => 100755 scripts/kconfig/lxdialog/check-lxdialog.sh mode change 100644 => 100755 scripts/kconfig/streamline_config.pl mode change 100644 => 100755 scripts/link-vmlinux.sh mode change 100644 => 100755 scripts/markup_oops.pl mode change 100644 => 100755 scripts/mkmakefile mode change 100644 => 100755 scripts/mksysmap mode change 100644 => 100755 scripts/package/builddeb mode change 100644 => 100755 scripts/package/buildtar mode change 100644 => 100755 scripts/profile2linkerlist.pl mode change 100644 => 100755 scripts/rt-tester/rt-tester.py mode change 100644 => 100755 scripts/selinux/install_policy.sh mode change 100644 => 100755 scripts/tracing/draw_functrace.py mode change 100644 => 100755 scripts/xz_wrap.sh diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl old mode 100644 new mode 100755 diff --git a/scripts/export_report.pl b/scripts/export_report.pl old mode 100644 new mode 100755 diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh old mode 100644 new mode 100755 diff --git a/scripts/gcc-ld b/scripts/gcc-ld old mode 100644 new mode 100755 diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh old mode 100644 new mode 100755 diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh old mode 100644 new mode 100755 diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh old mode 100644 new mode 100755 diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh old mode 100644 new mode 100755 diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl old mode 100644 new mode 100755 diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh old mode 100644 new mode 100755 diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh old mode 100644 new mode 100755 diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl old mode 100644 new mode 100755 diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh old mode 100644 new mode 100755 diff --git a/scripts/markup_oops.pl b/scripts/markup_oops.pl old mode 100644 new mode 100755 diff --git a/scripts/mkmakefile b/scripts/mkmakefile old mode 100644 new mode 100755 diff --git a/scripts/mksysmap b/scripts/mksysmap old mode 100644 new mode 100755 diff --git a/scripts/package/builddeb b/scripts/package/builddeb old mode 100644 new mode 100755 diff --git a/scripts/package/buildtar b/scripts/package/buildtar old mode 100644 new mode 100755 diff --git a/scripts/profile2linkerlist.pl b/scripts/profile2linkerlist.pl old mode 100644 new mode 100755 diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py old mode 100644 new mode 100755 diff --git a/scripts/selinux/install_policy.sh b/scripts/selinux/install_policy.sh old mode 100644 new mode 100755 diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py old mode 100644 new mode 100755 diff --git a/scripts/xz_wrap.sh b/scripts/xz_wrap.sh old mode 100644 new mode 100755 -- GitLab From b7c71823f11158340b9d61325d3c44124650dc4e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Fri, 15 Aug 2014 12:01:31 +0100 Subject: [PATCH 0219/1868] drm/i915/bdw: Don't write PDP in the legacy way when using LRCs This is mostly for correctness so that we know we are running the LR context correctly (this is, the PDPs are contained inside the context object). v2: Move the check to inside the enable PPGTT function. The switch happens in two places: the legacy context switch (that we won't hit when Execlists are enabled) and the PPGTT enable, which unfortunately we need. This would look much nicer if the ppgtt->enable was part of the ring init, where it logically belongs. v3: Move the check to the start of the enable PPGTT function. None of the legacy PPGTT enabling is required when using LRCs as the PPGTT is enabled in the context descriptor and the PDPs are written in the LRC. v4: Clarify comment based on review feedback. Signed-off-by: Oscar Mateo Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau [danvet: Resolve conflicts with ppgtt_enable rework.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d97b280861ee..4db237065610 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -826,6 +826,12 @@ static void gen8_ppgtt_enable(struct drm_device *dev) struct intel_engine_cs *ring; int j; + /* In the case of execlists, PPGTT is enabled by the context descriptor + * and the PDPs are contained within the context itself. We don't + * need to do anything here. */ + if (i915.enable_execlists) + return; + for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); -- GitLab From cc9130be805d955f0e06642e57741dd9df1fbc86 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:42 +0100 Subject: [PATCH 0220/1868] drm/i915/bdw: Make sure gpu reset still works with Execlists If we reset a ring after a hang, we have to make sure that we clear out all queued Execlists requests. v2: The ring is, at this point, already being correctly re-programmed for Execlists, and the hangcheck counters cleared. v3: Daniel suggests to drop the "if (execlists)" because the Execlists queue should be empty in legacy mode (which is true, if we do the INIT_LIST_HEAD). v4: Do the pending intel_runtime_pm_put Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 32fa1e9eb844..aa4103bdd352 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2551,6 +2551,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_free_request(request); } + while (!list_empty(&ring->execlist_queue)) { + struct intel_ctx_submit_request *submit_req; + + submit_req = list_first_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + list_del(&submit_req->execlist_link); + intel_runtime_pm_put(dev_priv); + i915_gem_context_unreference(submit_req->ctx); + kfree(submit_req); + } + /* These may not have been flush before the reset, do so now */ kfree(ring->preallocated_lazy_request); ring->preallocated_lazy_request = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 13543f8528c2..4fb1ec95ec08 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1612,6 +1612,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->execlist_queue); ringbuf->size = 32 * PAGE_SIZE; ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); -- GitLab From 71386ef9008817feebd863e46d8711ebe9e7cbbb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:44 +0100 Subject: [PATCH 0221/1868] drm/i915/bdw: Disable semaphores for Execlists Up until recently, semaphores weren't enabled in BDW so we didn't care about them. But then Rodrigo came and enabled them: commit 521e62e49a42661a4ee0102644517dbe2f100a23 Author: Rodrigo Vivi drm/i915: Enable semaphores on BDW So now we have to explicitly disable them for Execlists until both features play nicely. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2f112853c36f..117f5c16df74 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -481,6 +481,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) if (i915.semaphores >= 0) return i915.semaphores; + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + /* Until we get further testing... */ if (IS_GEN8(dev)) return false; -- GitLab From 4ba70e448be91f52032595678c306e4aee2fae5c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 7 Aug 2014 13:23:20 +0100 Subject: [PATCH 0222/1868] drm/i915/bdw: Display execlists info in debugfs v2: Warn and return if LRCs are not enabled. v3: Grab the Execlists spinlock (noticed by Daniel Vetter). Signed-off-by: Oscar Mateo v4: Lock the struct mutex for atomic state capture Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 81 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 6 --- drivers/gpu/drm/i915/intel_lrc.h | 7 +++ 3 files changed, 88 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d42db6bc34e0..68335813ef4c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1721,6 +1721,86 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } +static int i915_execlists(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 ctx_id; + struct list_head *cursor; + int ring_id, i; + int ret; + + if (!i915.enable_execlists) { + seq_puts(m, "Logical Ring Contexts are disabled\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + for_each_ring(ring, dev_priv, ring_id) { + struct intel_ctx_submit_request *head_req = NULL; + int count = 0; + unsigned long flags; + + seq_printf(m, "%s\n", ring->name); + + status = I915_READ(RING_EXECLIST_STATUS(ring)); + ctx_id = I915_READ(RING_EXECLIST_STATUS(ring) + 4); + seq_printf(m, "\tExeclist status: 0x%08X, context: %u\n", + status, ctx_id); + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n", + read_pointer, write_pointer); + + for (i = 0; i < 6; i++) { + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i); + ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i + 4); + + seq_printf(m, "\tStatus buffer %d: 0x%08X, context: %u\n", + i, status, ctx_id); + } + + spin_lock_irqsave(&ring->execlist_lock, flags); + list_for_each(cursor, &ring->execlist_queue) + count++; + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, execlist_link); + spin_unlock_irqrestore(&ring->execlist_lock, flags); + + seq_printf(m, "\t%d requests in queue\n", count); + if (head_req) { + struct drm_i915_gem_object *ctx_obj; + + ctx_obj = head_req->ctx->engine[ring_id].state; + seq_printf(m, "\tHead request id: %u\n", + intel_execlists_ctx_id(ctx_obj)); + seq_printf(m, "\tHead request tail: %u\n", + head_req->tail); + } + + seq_putc(m, '\n'); + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -3974,6 +4054,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, + {"i915_execlists", i915_execlists, 0}, {"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, {"i915_ppgtt_info", i915_ppgtt_info, 0}, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f6c5a931faf..cc923a96fa4c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -46,12 +46,6 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 -#define RING_ELSP(ring) ((ring)->mmio_base+0x230) -#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) -#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) -#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) -#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 331c6c2ba376..117d1a4eb3b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,13 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Execlists regs */ +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + /* Logical Rings */ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); -- GitLab From c9fe99bd4c4f8730207fed5e863d8f25224fd20b Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:46 +0100 Subject: [PATCH 0223/1868] drm/i915/bdw: Display context backing obj & ringbuffer info in debugfs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 37 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 68335813ef4c..4f279cfe67b8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1677,6 +1677,14 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return 0; } +static void describe_ctx_ringbuf(struct seq_file *m, + struct intel_ringbuffer *ringbuf) +{ + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", + ringbuf->space, ringbuf->head, ringbuf->tail, + ringbuf->last_retired_head); +} + static int i915_context_status(struct seq_file *m, void *unused) { struct drm_info_node *node = m->private; @@ -1703,16 +1711,37 @@ static int i915_context_status(struct seq_file *m, void *unused) } list_for_each_entry(ctx, &dev_priv->context_list, link) { - if (ctx->legacy_hw_ctx.rcs_state == NULL) + if (!i915.enable_execlists && + ctx->legacy_hw_ctx.rcs_state == NULL) continue; seq_puts(m, "HW context "); describe_ctx(m, ctx); - for_each_ring(ring, dev_priv, i) + for_each_ring(ring, dev_priv, i) { if (ring->default_context == ctx) - seq_printf(m, "(default context %s) ", ring->name); + seq_printf(m, "(default context %s) ", + ring->name); + } + + if (i915.enable_execlists) { + seq_putc(m, '\n'); + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = + ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = + ctx->engine[i].ringbuf; + + seq_printf(m, "%s: ", ring->name); + if (ctx_obj) + describe_obj(m, ctx_obj); + if (ringbuf) + describe_ctx_ringbuf(m, ringbuf); + seq_putc(m, '\n'); + } + } else { + describe_obj(m, ctx->legacy_hw_ctx.rcs_state); + } - describe_obj(m, ctx->legacy_hw_ctx.rcs_state); seq_putc(m, '\n'); } -- GitLab From c0ab1ae9028f14bcb7bfb655bd2120c60681c479 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 7 Aug 2014 13:24:26 +0100 Subject: [PATCH 0224/1868] drm/i915/bdw: Print context state in debugfs This has turned out to be really handy in debug so far. Update: Since writing this patch, I've gotten similar code upstream for error state. I've used it quite a bit in debugfs however, and I'd like to keep it here at least until preemption is working. Signed-off-by: Ben Widawsky This patch was accidentally dropped in the first Execlists version, and it has been very useful indeed. Put it back again, but as a standalone debugfs file. Signed-off-by: Oscar Mateo v2: Take the device struct_mutex rather than mode_config mutex for atomic state capture. Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 52 +++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4f279cfe67b8..6c82bdaa0822 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1750,6 +1750,57 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } +static int i915_dump_lrc(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + struct intel_context *ctx; + int ret, i; + + if (!i915.enable_execlists) { + seq_printf(m, "Logical Ring Contexts are disabled\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + list_for_each_entry(ctx, &dev_priv->context_list, link) { + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + + if (ring->default_context == ctx) + continue; + + if (ctx_obj) { + struct page *page = i915_gem_object_get_page(ctx_obj, 1); + uint32_t *reg_state = kmap_atomic(page); + int j; + + seq_printf(m, "CONTEXT: %s %u\n", ring->name, + intel_execlists_ctx_id(ctx_obj)); + + for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { + seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", + i915_gem_obj_ggtt_offset(ctx_obj) + 4096 + (j * 4), + reg_state[j], reg_state[j + 1], + reg_state[j + 2], reg_state[j + 3]); + } + kunmap_atomic(reg_state); + + seq_putc(m, '\n'); + } + } + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_execlists(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -4083,6 +4134,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, + {"i915_dump_lrc", i915_dump_lrc, 0}, {"i915_execlists", i915_execlists, 0}, {"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, -- GitLab From 73e4d07f8ae9cff8c869d73df4e299a3a6f5ad98 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:48 +0100 Subject: [PATCH 0225/1868] drm/i915/bdw: Document Logical Rings, LR contexts and Execlists Add theory of operation notes to intel_lrc.c and comments to externally visible functions. v2: Add notes on logical ring context creation. v3: Use kerneldoc. v4: Integrate it in the DocBook template. Signed-off-by: Thomas Daniel (v1) Signed-off-by: Oscar Mateo (v2, v3) Reviewed-by: Damien Lespiau [danvet: Drop hunk about render ring init function since that's not yet merged.] Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/intel_lrc.c | 203 ++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 30 +++++ 3 files changed, 237 insertions(+), 1 deletion(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 972759489376..689e3e38b9c3 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3919,6 +3919,11 @@ int num_ioctls; !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c + + Logical Rings, Logical Ring Contexts and Execlists +!Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists +!Idrivers/gpu/drm/i915/intel_lrc.c + diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cc923a96fa4c..c096b9b7f22a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -28,13 +28,108 @@ * */ -/* +/** + * DOC: Logical Rings, Logical Ring Contexts and Execlists + * + * Motivation: * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". * These expanded contexts enable a number of new abilities, especially * "Execlists" (also implemented in this file). * + * One of the main differences with the legacy HW contexts is that logical + * ring contexts incorporate many more things to the context's state, like + * PDPs or ringbuffer control registers: + * + * The reason why PDPs are included in the context is straightforward: as + * PPGTTs (per-process GTTs) are actually per-context, having the PDPs + * contained there mean you don't need to do a ppgtt->switch_mm yourself, + * instead, the GPU will do it for you on the context switch. + * + * But, what about the ringbuffer control registers (head, tail, etc..)? + * shouldn't we just need a set of those per engine command streamer? This is + * where the name "Logical Rings" starts to make sense: by virtualizing the + * rings, the engine cs shifts to a new "ring buffer" with every context + * switch. When you want to submit a workload to the GPU you: A) choose your + * context, B) find its appropriate virtualized ring, C) write commands to it + * and then, finally, D) tell the GPU to switch to that context. + * + * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch + * to a contexts is via a context execution list, ergo "Execlists". + * + * LRC implementation: + * Regarding the creation of contexts, we have: + * + * - One global default context. + * - One local default context for each opened fd. + * - One local extra context for each context create ioctl call. + * + * Now that ringbuffers belong per-context (and not per-engine, like before) + * and that contexts are uniquely tied to a given engine (and not reusable, + * like before) we need: + * + * - One ringbuffer per-engine inside each context. + * - One backing object per-engine inside each context. + * + * The global default context starts its life with these new objects fully + * allocated and populated. The local default context for each opened fd is + * more complex, because we don't know at creation time which engine is going + * to use them. To handle this, we have implemented a deferred creation of LR + * contexts: + * + * The local context starts its life as a hollow or blank holder, that only + * gets populated for a given engine once we receive an execbuffer. If later + * on we receive another execbuffer ioctl for the same context but a different + * engine, we allocate/populate a new ringbuffer and context backing object and + * so on. + * + * Finally, regarding local contexts created using the ioctl call: as they are + * only allowed with the render ring, we can allocate & populate them right + * away (no need to defer anything, at least for now). + * + * Execlists implementation: * Execlists are the new method by which, on gen8+ hardware, workloads are * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + * This method works as follows: + * + * When a request is committed, its commands (the BB start and any leading or + * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer + * for the appropriate context. The tail pointer in the hardware context is not + * updated at this time, but instead, kept by the driver in the ringbuffer + * structure. A structure representing this request is added to a request queue + * for the appropriate engine: this structure contains a copy of the context's + * tail after the request was written to the ring buffer and a pointer to the + * context itself. + * + * If the engine's request queue was empty before the request was added, the + * queue is processed immediately. Otherwise the queue will be processed during + * a context switch interrupt. In any case, elements on the queue will get sent + * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a + * globally unique 20-bits submission ID. + * + * When execution of a request completes, the GPU updates the context status + * buffer with a context complete event and generates a context switch interrupt. + * During the interrupt handling, the driver examines the events in the buffer: + * for each context complete event, if the announced ID matches that on the head + * of the request queue, then that request is retired and removed from the queue. + * + * After processing, if any requests were retired and the queue is not empty + * then a new execution list can be submitted. The two requests at the front of + * the queue are next to be submitted but since a context may not occur twice in + * an execution list, if subsequent requests have the same ID as the first then + * the two requests must be combined. This is done simply by discarding requests + * at the head of the queue until either only one requests is left (in which case + * we use a NULL second context) or the first two requests have unique IDs. + * + * By always executing the first two requests in the queue the driver ensures + * that the GPU is kept as busy as possible. In the case where a single context + * completes but a second context is still executing, the request for this second + * context will be at the head of the queue when we remove the first one. This + * request will then be resubmitted along with a new request for a different context, + * which will cause the hardware to continue executing the second request and queue + * the new request (the GPU detects the condition of a context getting preempted + * with the same context and optimizes the context switch flow by not doing + * preemption, but just sampling the new tail pointer). + * */ #include @@ -109,6 +204,17 @@ enum { }; #define GEN8_CTX_ID_SHIFT 32 +/** + * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists + * @dev: DRM device. + * @enable_execlists: value of i915.enable_execlists module parameter. + * + * Only certain platforms support Execlists (the prerequisites being + * support for Logical Ring Contexts and Aliasing PPGTT or better), + * and only when enabled via module parameter. + * + * Return: 1 if Execlists is supported and has to be enabled. + */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -123,6 +229,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +/** + * intel_execlists_ctx_id() - get the Execlists Context ID + * @ctx_obj: Logical Ring Context backing object. + * + * Do not confuse with ctx->id! Unfortunately we have a name overload + * here: the old context ID we pass to userspace as a handler so that + * they can refer to a context, and the new context ID we pass to the + * ELSP so that the GPU can inform us of the context status via + * interrupts. + * + * Return: 20-bits globally unique context ID. + */ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) { u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); @@ -313,6 +431,13 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, return false; } +/** + * intel_execlists_handle_ctx_events() - handle Context Switch interrupts + * @ring: Engine Command Streamer to handle. + * + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -481,6 +606,23 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, return logical_ring_invalidate_all_caches(ringbuf); } +/** + * execlists_submission() - submit a batchbuffer for execution, Execlists style + * @dev: DRM device. + * @file: DRM file. + * @ring: Engine Command Streamer to submit to. + * @ctx: Context to employ for this submission. + * @args: execbuffer call arguments. + * @vmas: list of vmas. + * @batch_obj: the batchbuffer to submit. + * @exec_start: batchbuffer start virtual address pointer. + * @flags: translated execbuffer call flags. + * + * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts + * away the submission details of the execbuffer ioctl call. + * + * Return: non-zero if the submission fails. + */ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct intel_context *ctx, @@ -608,6 +750,15 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) return 0; } +/** + * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * @ringbuf: Logical Ringbuffer to advance. + * + * The tail is updated in our logical ringbuffer struct, not in the actual context. What + * really happens during submission is that the context and current tail will be placed + * on a queue waiting for the ELSP to be ready to accept a new context submission. At that + * point, the tail *inside* the context is updated and the ELSP written to. + */ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -781,6 +932,19 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) return 0; } +/** + * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands + * + * @ringbuf: Logical ringbuffer. + * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. + * + * The ringbuffer might not be ready to accept the commands right away (maybe it needs to + * be wrapped, or wait a bit for the tail to be updated). This function takes care of that + * and also preallocates a request (every workload submission is still mediated through + * requests, same as it did with legacy ringbuffer submission). + * + * Return: non-zero if the ringbuffer is not ready to be written to. + */ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) { struct intel_engine_cs *ring = ringbuf->ring; @@ -1021,6 +1185,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) return 0; } +/** + * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer + * + * @ring: Engine Command Streamer. + * + */ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -1215,6 +1385,16 @@ static int logical_vebox_ring_init(struct drm_device *dev) return logical_ring_init(dev, ring); } +/** + * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers + * @dev: DRM device. + * + * This function inits the engines for an Execlists submission style (the equivalent in the + * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for + * those engines that are present in the hardware. + * + * Return: non-zero if the initialization failed. + */ int intel_logical_rings_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1377,6 +1557,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o return 0; } +/** + * intel_lr_context_free() - free the LRC specific bits of a context + * @ctx: the LR context to free. + * + * The real context freeing is done in i915_gem_context_free: this only + * takes care of the bits that are LRC related: the per-engine backing + * objects and the logical ringbuffer. + */ void intel_lr_context_free(struct intel_context *ctx) { int i; @@ -1415,6 +1603,19 @@ static uint32_t get_lr_context_size(struct intel_engine_cs *ring) return ret; } +/** + * intel_lr_context_deferred_create() - create the LRC specific bits of a context + * @ctx: LR context to create. + * @ring: engine to be used with the context. + * + * This function can be called more than once, with different engines, if we plan + * to use the context with them. The context backing objects and the ringbuffers + * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why + * the creation is a deferred call: it's better to make sure first that we need to use + * a given ring with the context. + * + * Return: non-zero on eror. + */ int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 117d1a4eb3b9..991d4499fb03 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -38,10 +38,21 @@ int intel_logical_rings_init(struct drm_device *dev); int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +/** + * intel_logical_ring_advance() - advance the ringbuffer tail + * @ringbuf: Ringbuffer to advance. + * + * The tail is only updated in our logical ringbuffer struct. + */ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { ringbuf->tail &= ringbuf->size - 1; } +/** + * intel_logical_ring_emit() - write a DWORD to the ringbuffer. + * @ringbuf: Ringbuffer to write to. + * @data: DWORD to write. + */ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) { @@ -66,6 +77,25 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, u64 exec_start, u32 flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +/** + * struct intel_ctx_submit_request - queued context submission request + * @ctx: Context to submit to the ELSP. + * @ring: Engine to submit it to. + * @tail: how far in the context's ringbuffer this request goes to. + * @execlist_link: link in the submission queue. + * @work: workqueue for processing this request in a bottom half. + * @elsp_submitted: no. of times this request has been sent to the ELSP. + * + * The ELSP only accepts two elements at a time, so we queue context/tail + * pairs on a given queue (ring->execlist_queue) until the hardware is + * available. The queue serves a double purpose: we also use it to keep track + * of the up to 2 contexts currently in the hardware (usually one in execution + * and the other queued up by the GPU): We only remove elements from the head + * of the queue when the hardware informs us that an element has been + * completed. + * + * All accesses to the queue are mediated by a spinlock (ring->execlist_lock). + */ struct intel_ctx_submit_request { struct intel_context *ctx; struct intel_engine_cs *ring; -- GitLab From d7f621e50704306c348ccb192f17047f1499f9bc Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:49 +0100 Subject: [PATCH 0226/1868] drm/i915/bdw: Enable Logical Ring Contexts (hence, Execlists) The time has come, the Walrus said, to talk of many things. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed52ac744105..d4d2abbb8e6c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2088,7 +2088,7 @@ struct drm_i915_cmd_table { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) -#define HAS_LOGICAL_RING_CONTEXTS(dev) 0 +#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) -- GitLab From fd639ac6dcbcbae4f2131bf1390a032df659ffb7 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 15 Aug 2014 16:48:36 +0100 Subject: [PATCH 0227/1868] drm/i915/bdw: Disable execlists by default We still have a few missing bits and pieces to have execlists enabled by default eg. the error capture or the render state initialization and so it wouldn't be wise to enable it by default on BDW just yet. Cc: Daniel Vetter Cc: Thomas Daniel Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Tested-by: Paulo Zanoni Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82740 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index f7f8350c3793..1dcb1bed5ef8 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -35,7 +35,7 @@ struct i915_params i915 __read_mostly = { .vbt_sdvo_panel_type = -1, .enable_rc6 = -1, .enable_fbc = -1, - .enable_execlists = -1, + .enable_execlists = 0, .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = 1, @@ -122,7 +122,7 @@ MODULE_PARM_DESC(enable_ppgtt, module_param_named(enable_execlists, i915.enable_execlists, int, 0400); MODULE_PARM_DESC(enable_execlists, "Override execlists usage. " - "(-1=auto [default], 0=disabled, 1=enabled)"); + "(-1=auto, 0=disabled [default], 1=enabled)"); module_param_named(enable_psr, i915.enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: true)"); -- GitLab From 3a5f87c286515c54ff5c52c3e64d0c522b7570c0 Mon Sep 17 00:00:00 2001 From: Thomas Wood Date: Wed, 20 Aug 2014 14:45:00 +0100 Subject: [PATCH 0228/1868] drm: fix plane rotation when restoring fbdev configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure plane rotation is reset correctly when restoring the fbdev configuration by using drm_mode_plane_set_obj_prop which calls the driver's set_property callback. The rotation reset feature was introduced in commit 9783de2 (drm: Resetting rotation property) and the callback issue was originally addressed in a previous version of the patch, but the fix was not present in the final version. v2: Fix documentation warning Add some more details to the commit message (Daniel Vetter) Testcase: igt/kms_rotation_crc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82236 Cc: Sonika Jindal Cc: Ville Syrjälä Cc: Dave Airlie Cc: Daniel Vetter Signed-off-by: Thomas Wood Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 25 ++++++++++++++++++++----- drivers/gpu/drm/drm_fb_helper.c | 6 +++--- include/drm/drm_crtc.h | 3 +++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3c4a62169f28..d7e4c0e2e796 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4156,12 +4156,25 @@ static int drm_mode_crtc_set_obj_prop(struct drm_mode_object *obj, return ret; } -static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj, - struct drm_property *property, - uint64_t value) +/** + * drm_mode_plane_set_obj_prop - set the value of a property + * @plane: drm plane object to set property value for + * @property: property to set + * @value: value the property should be set to + * + * This functions sets a given property on a given plane object. This function + * calls the driver's ->set_property callback and changes the software state of + * the property if the callback succeeds. + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_mode_plane_set_obj_prop(struct drm_plane *plane, + struct drm_property *property, + uint64_t value) { int ret = -EINVAL; - struct drm_plane *plane = obj_to_plane(obj); + struct drm_mode_object *obj = &plane->base; if (plane->funcs->set_property) ret = plane->funcs->set_property(plane, property, value); @@ -4170,6 +4183,7 @@ static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj, return ret; } +EXPORT_SYMBOL(drm_mode_plane_set_obj_prop); /** * drm_mode_getproperty_ioctl - get the current value of a object's property @@ -4308,7 +4322,8 @@ int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, ret = drm_mode_crtc_set_obj_prop(arg_obj, property, arg->value); break; case DRM_MODE_OBJECT_PLANE: - ret = drm_mode_plane_set_obj_prop(arg_obj, property, arg->value); + ret = drm_mode_plane_set_obj_prop(obj_to_plane(arg_obj), + property, arg->value); break; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d139eddb3d61..99569ee5adee 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -350,9 +350,9 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper) drm_plane_force_disable(plane); if (dev->mode_config.rotation_property) { - drm_object_property_set_value(&plane->base, - dev->mode_config.rotation_property, - BIT(DRM_ROTATE_0)); + drm_mode_plane_set_obj_prop(plane, + dev->mode_config.rotation_property, + BIT(DRM_ROTATE_0)); } } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 62f73bdbcc47..38fae5d9ad73 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1121,6 +1121,9 @@ extern int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int drm_mode_plane_set_obj_prop(struct drm_plane *plane, + struct drm_property *property, + uint64_t value); extern void drm_fb_get_bpp_depth(uint32_t format, unsigned int *depth, int *bpp); -- GitLab From c40724d3f38122b8ae06367a425a63c24988c10f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 4 Jun 2014 00:52:30 -0700 Subject: [PATCH 0229/1868] kconfig: lxdialog: fix spelling Signed-off-by: Brian Norris Cc: "Yann E. MORIN" Signed-off-by: Michal Marek --- scripts/kconfig/lxdialog/dialog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h index b4343d384926..fcffd5b41fb0 100644 --- a/scripts/kconfig/lxdialog/dialog.h +++ b/scripts/kconfig/lxdialog/dialog.h @@ -170,7 +170,7 @@ char item_tag(void); /* item list manipulation for lxdialog use */ #define MAXITEMSTR 200 struct dialog_item { - char str[MAXITEMSTR]; /* promtp displayed */ + char str[MAXITEMSTR]; /* prompt displayed */ char tag; void *data; /* pointer to menu item - used by menubox+checklist */ int selected; /* Set to 1 by dialog_*() function if selected. */ -- GitLab From 7285996aa0006d671bb01f0d35991d254b2b2b01 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 4 Jun 2014 00:52:31 -0700 Subject: [PATCH 0230/1868] kconfig: nconfig: fix multi-byte UTF handling Currently, Kconfig descriptions that use multi-byte UTF-8 characters (such as MTD_NAND_CAFE) will have their menu entries dropped from the 'make nconfig' ncurses menu, and all subsequent entries in the same window will be omitted. This seems to be due to the ncurses 'menu' library, which does not traditionally handle UTF-8 >8-bit characters properly. The ncursesw library ('w' is for "wide") is written to handle these UTF-8 characters, and is practically a drop-in replacement at the source level. Use it by default, if available. Link: https://bugzilla.kernel.org/show_bug.cgi?id=43067 Signed-off-by: Brian Norris Cc: "Yann E. MORIN" Cc: Martin Walch Acked-by: Sam Ravnborg Signed-off-by: Michal Marek --- scripts/kconfig/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index e7bf38e92007..c05938555225 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -191,7 +191,8 @@ HOSTCFLAGS_gconf.o = `pkg-config --cflags gtk+-2.0 gmodule-2.0 libglade-2.0` \ HOSTLOADLIBES_mconf = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC)) HOSTLOADLIBES_nconf = $(shell \ - pkg-config --libs menu panel ncurses 2>/dev/null \ + pkg-config --libs menuw panelw ncursesw 2>/dev/null \ + || pkg-config --libs menu panel ncurses 2>/dev/null \ || echo "-lmenu -lpanel -lncurses" ) $(obj)/qconf.o: $(obj)/.tmp_qtcheck -- GitLab From c28135481428d0674fcc1da0740ed3f4343df5b2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 22 Aug 2014 22:39:37 +0200 Subject: [PATCH 0231/1868] drm/i915: Update DRIVER_DATE to 20140822 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d4d2abbb8e6c..d309725a7d7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -54,7 +54,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20140808" +#define DRIVER_DATE "20140822" enum pipe { INVALID_PIPE = -1, -- GitLab From 50917e265ebd2ea33bc34b22b3981f1e88415eae Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 16:38:38 +0200 Subject: [PATCH 0232/1868] iommu/amd: Move struct iommu_dev_data to amd_iommu.c The struct is only used there, so it doesn't need to be in the header file. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 21 +++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 21 --------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ecb0109a5360..fe6d7cc715f7 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -87,6 +87,27 @@ int amd_iommu_max_glx_val = -1; static struct dma_map_ops amd_iommu_dma_ops; +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct list_head dev_data_list; /* For global dev_data_list */ + struct iommu_dev_data *alias_data;/* The alias dev_data */ + struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reference count */ + u16 devid; /* PCI Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Default for device is pt_domain */ + struct { + bool enabled; + int qdep; + } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ +}; + /* * general struct to manage commands send to an IOMMU */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 8e43b7cba133..cec51a8ba844 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -417,27 +417,6 @@ struct protection_domain { }; -/* - * This struct contains device specific data for the IOMMU - */ -struct iommu_dev_data { - struct list_head list; /* For domain->dev_list */ - struct list_head dev_data_list; /* For global dev_data_list */ - struct iommu_dev_data *alias_data;/* The alias dev_data */ - struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reference count */ - u16 devid; /* PCI Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Default for device is pt_domain */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for - PPR completions */ - u32 errata; /* Bitmap for errata to apply */ -}; - /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. -- GitLab From f251e187f2949c690fc13a91df7b873a6b5be671 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 16:48:10 +0200 Subject: [PATCH 0233/1868] iommu/amd: Keep a list of devices in an alias group Some broken devices might use any request-id from the alias group, so we need to set a DTE entry for every device in there. This patch adds creation of those lists. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fe6d7cc715f7..8a152564a098 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -93,6 +93,7 @@ static struct dma_map_ops amd_iommu_dma_ops; struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct list_head dev_data_list; /* For global dev_data_list */ + struct list_head alias_list; /* Link alias-groups together */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reference count */ @@ -135,6 +136,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; + INIT_LIST_HEAD(&dev_data->alias_list); + dev_data->devid = devid; atomic_set(&dev_data->bind, 0); @@ -383,6 +386,9 @@ static int iommu_init_device(struct device *dev) return -ENOTSUPP; } dev_data->alias_data = alias_data; + + /* Add device to the alias_list */ + list_add(&dev_data->alias_list, &alias_data->alias_list); } ret = init_iommu_group(dev); -- GitLab From 397111abaaac259afcc48cd2fbfb78f63f27e797 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 17:31:51 +0200 Subject: [PATCH 0234/1868] iommu/amd: Attach and detach complete alias group Change tha device attach and detach semantic to apply to all devices in an alias group. This means all devices in an alias group are now attached and detached at the same time. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 46 +++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 8a152564a098..a5e6b0a2de16 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2149,35 +2149,29 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { + struct iommu_dev_data *head, *entry; int ret; /* lock domain */ spin_lock(&domain->lock); - if (dev_data->alias_data != NULL) { - struct iommu_dev_data *alias_data = dev_data->alias_data; - - /* Some sanity checks */ - ret = -EBUSY; - if (alias_data->domain != NULL && - alias_data->domain != domain) - goto out_unlock; + head = dev_data; - if (dev_data->domain != NULL && - dev_data->domain != domain) - goto out_unlock; + if (head->alias_data != NULL) + head = head->alias_data; - /* Do real assignment */ - if (alias_data->domain == NULL) - do_attach(alias_data, domain); + /* Now we have the root of the alias group, if any */ - atomic_inc(&alias_data->bind); - } + ret = -EBUSY; + if (head->domain != NULL) + goto out_unlock; - if (dev_data->domain == NULL) - do_attach(dev_data, domain); + /* Attach alias group root */ + do_attach(head, domain); - atomic_inc(&dev_data->bind); + /* Attach other devices in the alias group */ + list_for_each_entry(entry, &head->alias_list, alias_list) + do_attach(entry, domain); ret = 0; @@ -2325,6 +2319,7 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct iommu_dev_data *dev_data) { + struct iommu_dev_data *head, *entry; struct protection_domain *domain; unsigned long flags; @@ -2334,15 +2329,14 @@ static void __detach_device(struct iommu_dev_data *dev_data) spin_lock_irqsave(&domain->lock, flags); - if (dev_data->alias_data != NULL) { - struct iommu_dev_data *alias_data = dev_data->alias_data; + head = dev_data; + if (head->alias_data != NULL) + head = head->alias_data; - if (atomic_dec_and_test(&alias_data->bind)) - do_detach(alias_data); - } + list_for_each_entry(entry, &head->alias_list, alias_list) + do_detach(entry); - if (atomic_dec_and_test(&dev_data->bind)) - do_detach(dev_data); + do_detach(head); spin_unlock_irqrestore(&domain->lock, flags); -- GitLab From cafd2545cfd36ef6fee14f9c36870b81d3c98de5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 17:34:25 +0200 Subject: [PATCH 0235/1868] iommu/amd: Remove device binding reference count This reference count is not used anymore, as all devices in an alias group are now attached and detached together. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a5e6b0a2de16..989c1ae03979 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -96,7 +96,6 @@ struct iommu_dev_data { struct list_head alias_list; /* Link alias-groups together */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reference count */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ @@ -139,7 +138,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) INIT_LIST_HEAD(&dev_data->alias_list); dev_data->devid = devid; - atomic_set(&dev_data->bind, 0); spin_lock_irqsave(&dev_data_list_lock, flags); list_add_tail(&dev_data->dev_data_list, &dev_data_list); @@ -3179,7 +3177,6 @@ static void cleanup_domain(struct protection_domain *domain) entry = list_first_entry(&domain->dev_list, struct iommu_dev_data, list); __detach_device(entry); - atomic_set(&entry->bind, 0); } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); -- GitLab From 604effb782a8a4d9a20c8af16bcbf86d742db119 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 26 Aug 2014 13:26:56 +0300 Subject: [PATCH 0236/1868] drm/i915: fix suspend/resume for GENs w/o runtime PM support Before sharing common parts between the system and runtime s/r handlers we WARNed if the runtime s/r handlers were called on GENs that didn't support RPM. But this WARN is not correct if the same handler is called from the system s/r path, since that can happen on any platform. This also broke system s/r on old platforms. The issue was introduced in commit 016970beb05da6285c2f3ed2bee1c676cb75972e Author: Sagar Kamble Date: Wed Aug 13 23:07:06 2014 +0530 v2: - remove the WARN and depend on the HAS_RUNTIME_PM check in rutime_suspend/resume instead (Daniel) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82751 Signed-off-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 117f5c16df74..0f7a522682a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1373,7 +1373,9 @@ static int intel_runtime_suspend(struct device *device) if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6(dev)))) return -ENODEV; - WARN_ON(!HAS_RUNTIME_PM(dev)); + if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev))) + return -ENODEV; + assert_force_wake_inactive(dev_priv); DRM_DEBUG_KMS("Suspending device\n"); @@ -1441,7 +1443,8 @@ static int intel_runtime_resume(struct device *device) struct drm_i915_private *dev_priv = dev->dev_private; int ret; - WARN_ON(!HAS_RUNTIME_PM(dev)); + if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev))) + return -ENODEV; DRM_DEBUG_KMS("Resuming device\n"); @@ -1476,16 +1479,12 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv->dev; int ret; - if (IS_GEN6(dev)) { - ret = 0; - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) ret = hsw_suspend_complete(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { + else if (IS_VALLEYVIEW(dev)) ret = vlv_suspend_complete(dev_priv); - } else { - ret = -ENODEV; - WARN_ON(1); - } + else + ret = 0; return ret; } @@ -1501,16 +1500,14 @@ static int intel_resume_prepare(struct drm_i915_private *dev_priv, struct drm_device *dev = dev_priv->dev; int ret; - if (IS_GEN6(dev)) { + if (IS_GEN6(dev)) ret = snb_resume_prepare(dev_priv, rpm_resume); - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) ret = hsw_resume_prepare(dev_priv, rpm_resume); - } else if (IS_VALLEYVIEW(dev)) { + else if (IS_VALLEYVIEW(dev)) ret = vlv_resume_prepare(dev_priv, rpm_resume); - } else { - WARN_ON(1); - ret = -ENODEV; - } + else + ret = 0; return ret; } -- GitLab From 68ecfe2fe2e6c636bb7e2cf616e658e342e05362 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 8 Aug 2014 12:07:55 +0200 Subject: [PATCH 0237/1868] video: of: display_timing: delete unneeded test before of_node_put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Of_node_put supports NULL as its argument, so the initial test is not necessary. Furthermore, native_mode cannot be NULL at this point. Suggested by Uwe Kleine-König. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e; @@ -if (e) of_node_put(e); // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/of_display_timing.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c index 987edf110038..339f831c8412 100644 --- a/drivers/video/of_display_timing.c +++ b/drivers/video/of_display_timing.c @@ -233,8 +233,7 @@ struct display_timings *of_get_display_timings(struct device_node *np) return disp; timingfail: - if (native_mode) - of_node_put(native_mode); + of_node_put(native_mode); display_timings_release(disp); entryfail: kfree(disp); -- GitLab From 1287c5bf214b906d64a71c481545010dbe1b5b66 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 13:20:24 +0200 Subject: [PATCH 0238/1868] video: fbdev: matrox: use c99 initializers in structures Use c99 initializers for structures. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @decl@ identifier i1,fld; type T; field list[n] fs; @@ struct i1 { fs T fld; ...}; @bad@ identifier decl.i1,i2; expression e; initializer list[decl.n] is; @@ struct i1 i2 = { is, + .fld = e - e ,...}; // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/matrox/matroxfb_base.c | 52 ++++++++++++++++++--- drivers/video/fbdev/matrox/matroxfb_maven.c | 20 ++++---- 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 7116c5309c7d..62539ca1cfa9 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -1341,19 +1341,57 @@ struct video_board { struct matrox_switch* lowlevel; }; #ifdef CONFIG_FB_MATROX_MILLENIUM -static struct video_board vbMillennium = {0x0800000, 0x0800000, FB_ACCEL_MATROX_MGA2064W, &matrox_millennium}; -static struct video_board vbMillennium2 = {0x1000000, 0x0800000, FB_ACCEL_MATROX_MGA2164W, &matrox_millennium}; -static struct video_board vbMillennium2A = {0x1000000, 0x0800000, FB_ACCEL_MATROX_MGA2164W_AGP, &matrox_millennium}; +static struct video_board vbMillennium = { + .maxvram = 0x0800000, + .maxdisplayable = 0x0800000, + .accelID = FB_ACCEL_MATROX_MGA2064W, + .lowlevel = &matrox_millennium +}; + +static struct video_board vbMillennium2 = { + .maxvram = 0x1000000, + .maxdisplayable = 0x0800000, + .accelID = FB_ACCEL_MATROX_MGA2164W, + .lowlevel = &matrox_millennium +}; + +static struct video_board vbMillennium2A = { + .maxvram = 0x1000000, + .maxdisplayable = 0x0800000, + .accelID = FB_ACCEL_MATROX_MGA2164W_AGP, + .lowlevel = &matrox_millennium +}; #endif /* CONFIG_FB_MATROX_MILLENIUM */ #ifdef CONFIG_FB_MATROX_MYSTIQUE -static struct video_board vbMystique = {0x0800000, 0x0800000, FB_ACCEL_MATROX_MGA1064SG, &matrox_mystique}; +static struct video_board vbMystique = { + .maxvram = 0x0800000, + .maxdisplayable = 0x0800000, + .accelID = FB_ACCEL_MATROX_MGA1064SG, + .lowlevel = &matrox_mystique +}; #endif /* CONFIG_FB_MATROX_MYSTIQUE */ #ifdef CONFIG_FB_MATROX_G -static struct video_board vbG100 = {0x0800000, 0x0800000, FB_ACCEL_MATROX_MGAG100, &matrox_G100}; -static struct video_board vbG200 = {0x1000000, 0x1000000, FB_ACCEL_MATROX_MGAG200, &matrox_G100}; +static struct video_board vbG100 = { + .maxvram = 0x0800000, + .maxdisplayable = 0x0800000, + .accelID = FB_ACCEL_MATROX_MGAG100, + .lowlevel = &matrox_G100 +}; + +static struct video_board vbG200 = { + .maxvram = 0x1000000, + .maxdisplayable = 0x1000000, + .accelID = FB_ACCEL_MATROX_MGAG200, + .lowlevel = &matrox_G100 +}; /* from doc it looks like that accelerator can draw only to low 16MB :-( Direct accesses & displaying are OK for whole 32MB */ -static struct video_board vbG400 = {0x2000000, 0x1000000, FB_ACCEL_MATROX_MGAG400, &matrox_G100}; +static struct video_board vbG400 = { + .maxvram = 0x2000000, + .maxdisplayable = 0x1000000, + .accelID = FB_ACCEL_MATROX_MGAG400, + .lowlevel = &matrox_G100 +}; #endif #define DEVF_VIDEO64BIT 0x0001 diff --git a/drivers/video/fbdev/matrox/matroxfb_maven.c b/drivers/video/fbdev/matrox/matroxfb_maven.c index ee41a0f276b2..bf5ce04f9aea 100644 --- a/drivers/video/fbdev/matrox/matroxfb_maven.c +++ b/drivers/video/fbdev/matrox/matroxfb_maven.c @@ -201,21 +201,23 @@ struct matrox_pll_ctl { }; static const struct matrox_pll_features2 maven1000_pll = { - 50000000, - 300000000, - 5, 128, - 3, 32, - 3 + .vco_freq_min = 50000000, + .vco_freq_max = 300000000, + .feed_div_min = 5, + .feed_div_max = 128, + .in_div_min = 3, + .in_div_max = 32, + .post_shift_max = 3 }; static const struct matrox_pll_ctl maven_PAL = { - 540000, - 50 + .ref_freq = 540000, + .den = 50 }; static const struct matrox_pll_ctl maven_NTSC = { - 450450, /* 27027000/60 == 27000000/59.94005994 */ - 60 + .ref_freq = 450450, /* 27027000/60 == 27000000/59.94005994 */ + .den = 60 }; static int matroxfb_PLL_mavenclock(const struct matrox_pll_features2* pll, -- GitLab From 39917f08721b2f04d06407777ce7ae5913533674 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 13:20:29 +0200 Subject: [PATCH 0239/1868] OMAPDSS: DSI: use c99 initializers in structures Use c99 initializers for structures. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @decl@ identifier i1,fld; type T; field list[n] fs; @@ struct i1 { fs T fld; ...}; @bad@ identifier decl.i1,i2; expression e; initializer list[decl.n] is; @@ struct i1 i2 = { is, + .fld = e - e ,...}; // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/omap2/dss/dsi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap2/dss/dsi.c b/drivers/video/fbdev/omap2/dss/dsi.c index 56b92444c54f..b6f6ae1d4664 100644 --- a/drivers/video/fbdev/omap2/dss/dsi.c +++ b/drivers/video/fbdev/omap2/dss/dsi.c @@ -2571,7 +2571,10 @@ static int dsi_sync_vc_vp(struct platform_device *dsidev, int channel) { struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); DECLARE_COMPLETION_ONSTACK(completion); - struct dsi_packet_sent_handler_data vp_data = { dsidev, &completion }; + struct dsi_packet_sent_handler_data vp_data = { + .dsidev = dsidev, + .completion = &completion + }; int r = 0; u8 bit; @@ -2617,7 +2620,10 @@ static void dsi_packet_sent_handler_l4(void *data, u32 mask) static int dsi_sync_vc_l4(struct platform_device *dsidev, int channel) { DECLARE_COMPLETION_ONSTACK(completion); - struct dsi_packet_sent_handler_data l4_data = { dsidev, &completion }; + struct dsi_packet_sent_handler_data l4_data = { + .dsidev = dsidev, + .completion = &completion + }; int r = 0; r = dsi_register_isr_vc(dsidev, channel, dsi_packet_sent_handler_l4, -- GitLab From 084244646217ec83970facaf7baf200c02a8183e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 17:50:28 +0200 Subject: [PATCH 0240/1868] video: fbdev: aty: use c99 initializers in structures Use c99 initializers for structures. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @decl@ identifier i1,fld; type T; field list[n] fs; @@ struct i1 { fs T fld; ...}; @bad@ identifier decl.i1,i2; expression e; initializer list[decl.n] is; @@ struct i1 i2 = { is, + .fld = e - e ,...}; // Signed-off-by: Julia Lawall Reviewed-by: Josh Triplett Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/aty/aty128fb.c | 63 ++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c index ff6070170d01..aedf2fbf9bf6 100644 --- a/drivers/video/fbdev/aty/aty128fb.c +++ b/drivers/video/fbdev/aty/aty128fb.c @@ -324,14 +324,61 @@ struct aty128_meminfo { }; /* various memory configurations */ -static const struct aty128_meminfo sdr_128 = - { 4, 4, 3, 3, 1, 3, 1, 16, 30, 16, "128-bit SDR SGRAM (1:1)" }; -static const struct aty128_meminfo sdr_64 = - { 4, 8, 3, 3, 1, 3, 1, 17, 46, 17, "64-bit SDR SGRAM (1:1)" }; -static const struct aty128_meminfo sdr_sgram = - { 4, 4, 1, 2, 1, 2, 1, 16, 24, 16, "64-bit SDR SGRAM (2:1)" }; -static const struct aty128_meminfo ddr_sgram = - { 4, 4, 3, 3, 2, 3, 1, 16, 31, 16, "64-bit DDR SGRAM" }; +static const struct aty128_meminfo sdr_128 = { + .ML = 4, + .MB = 4, + .Trcd = 3, + .Trp = 3, + .Twr = 1, + .CL = 3, + .Tr2w = 1, + .LoopLatency = 16, + .DspOn = 30, + .Rloop = 16, + .name = "128-bit SDR SGRAM (1:1)", +}; + +static const struct aty128_meminfo sdr_64 = { + .ML = 4, + .MB = 8, + .Trcd = 3, + .Trp = 3, + .Twr = 1, + .CL = 3, + .Tr2w = 1, + .LoopLatency = 17, + .DspOn = 46, + .Rloop = 17, + .name = "64-bit SDR SGRAM (1:1)", +}; + +static const struct aty128_meminfo sdr_sgram = { + .ML = 4, + .MB = 4, + .Trcd = 1, + .Trp = 2, + .Twr = 1, + .CL = 2, + .Tr2w = 1, + .LoopLatency = 16, + .DspOn = 24, + .Rloop = 16, + .name = "64-bit SDR SGRAM (2:1)", +}; + +static const struct aty128_meminfo ddr_sgram = { + .ML = 4, + .MB = 4, + .Trcd = 3, + .Trp = 3, + .Twr = 2, + .CL = 3, + .Tr2w = 1, + .LoopLatency = 16, + .DspOn = 31, + .Rloop = 16, + .name = "64-bit DDR SGRAM", +}; static struct fb_fix_screeninfo aty128fb_fix = { .id = "ATY Rage128", -- GitLab From c76031f3ece10939d87cc96857b9e045133064e6 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 8 Jul 2014 18:27:18 +0200 Subject: [PATCH 0241/1868] video: vermilion: remove unnecessary break after goto Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Jingoo Han Cc: linux-fbdev@vger.kernel.org Signed-off-by: Fabian Frederick Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/vermilion/vermilion.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c index 048a66640b03..5f930aeccf1f 100644 --- a/drivers/video/fbdev/vermilion/vermilion.c +++ b/drivers/video/fbdev/vermilion/vermilion.c @@ -481,7 +481,6 @@ static int vml_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) default: err = -ENODEV; goto out_err_1; - break; } info = &vinfo->info; -- GitLab From 87306c1d09c8dd7ed291a6b5d6c31323cd81f2ae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 11 Jul 2014 18:13:27 +0200 Subject: [PATCH 0242/1868] video: mx3fb: Update comment for dmaengine_prep_slave_sg() API Commit 16052827d98fbc13c31ebad560af4bd53e2b4dd5 ("dmaengine/dma_slave: introduce inline wrappers") changed the code to use the new API, but forgot to update a comment. Signed-off-by: Geert Uytterhoeven Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Jiri Kosina Cc: linux-fbdev@vger.kernel.org -- v2: - New Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/mx3fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/mx3fb.c b/drivers/video/fbdev/mx3fb.c index c645a0a0c341..5e97baf92721 100644 --- a/drivers/video/fbdev/mx3fb.c +++ b/drivers/video/fbdev/mx3fb.c @@ -1179,7 +1179,7 @@ static int mx3fb_pan_display(struct fb_var_screeninfo *var, /* * We enable the End of Frame interrupt, which will free a tx-descriptor, - * which we will need for the next device_prep_slave_sg(). The + * which we will need for the next dmaengine_prep_slave_sg(). The * IRQ-handler will disable the IRQ again. */ init_completion(&mx3_fbi->flip_cmpl); -- GitLab From 0c46575f7c7dea4068924f2c0d938232f68a5cda Mon Sep 17 00:00:00 2001 From: Pramod Gurav Date: Tue, 26 Aug 2014 17:40:37 +0530 Subject: [PATCH 0243/1868] msm: msm_fb: Add remove function for platform driver for clean unloading This adds a remove function to platform driver structure so that resources are released when driver is unloaded. Signed-off-by: Pramod Gurav CC: Jean-Christophe Plagniol-Villard CC: Tomi Valkeinen CC: Stephen Boyd CC: Jingoo Han CC: Rob Clark Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/msm/msm_fb.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/video/fbdev/msm/msm_fb.c b/drivers/video/fbdev/msm/msm_fb.c index 1374803fbcd9..4640188ffd6e 100644 --- a/drivers/video/fbdev/msm/msm_fb.c +++ b/drivers/video/fbdev/msm/msm_fb.c @@ -589,6 +589,8 @@ static int msmfb_probe(struct platform_device *pdev) msmfb->sleeping = WAKING; + platform_set_drvdata(pdev, msmfb); + return 0; error_register_framebuffer: @@ -598,9 +600,23 @@ static int msmfb_probe(struct platform_device *pdev) return ret; } +static int msmfb_remove(struct platform_device *pdev) +{ + struct msmfb_info *msmfb; + + msmfb = platform_get_drvdata(pdev); + + unregister_framebuffer(msmfb->fb); + iounmap(msmfb->fb->screen_base); + framebuffer_release(msmfb->fb); + + return 0; +} + static struct platform_driver msm_panel_driver = { /* need to write remove */ .probe = msmfb_probe, + .remove = msmfb_remove, .driver = {.name = "msm_panel"}, }; -- GitLab From fbaa19df03f87c2aa6a3a0a14dd7d4c098d45643 Mon Sep 17 00:00:00 2001 From: Pramod Gurav Date: Tue, 26 Aug 2014 18:22:58 +0530 Subject: [PATCH 0244/1868] msm: msm_fb: Move to using managed resources of kzalloc Move to managed verion of kzalloc. Also checks return for failure case which was missing. Signed-off-by: Pramod Gurav CC: Jean-Christophe Plagniol-Villard CC: Tomi Valkeinen CC: Stephen Boyd CC: Jingoo Han CC: Rob Clark Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/msm/msm_fb.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/msm/msm_fb.c b/drivers/video/fbdev/msm/msm_fb.c index 4640188ffd6e..2979d7e72126 100644 --- a/drivers/video/fbdev/msm/msm_fb.c +++ b/drivers/video/fbdev/msm/msm_fb.c @@ -569,8 +569,13 @@ static int msmfb_probe(struct platform_device *pdev) mutex_init(&msmfb->panel_init_lock); init_waitqueue_head(&msmfb->frame_wq); INIT_WORK(&msmfb->resume_work, power_on_panel); - msmfb->black = kzalloc(msmfb->fb->var.bits_per_pixel*msmfb->xres, - GFP_KERNEL); + msmfb->black = devm_kzalloc(&pdev->dev, + msmfb->fb->var.bits_per_pixel*msmfb->xres, + GFP_KERNEL); + if (!msmfb->black) { + ret = -ENOMEM; + goto error_register_framebuffer; + } printk(KERN_INFO "msmfb_probe() installing %d x %d panel\n", msmfb->xres, msmfb->yres); -- GitLab From f57eda296dc327fc3f5d21099cf232bcd004237f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 20:33:22 +0200 Subject: [PATCH 0245/1868] video: fbdev: riva: delete double assignment Delete successive assignments to the same location. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression i; @@ *i = ...; i = ...; // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/riva/riva_hw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/riva/riva_hw.c b/drivers/video/fbdev/riva/riva_hw.c index 78fdbf5178d7..8bdf37f3013b 100644 --- a/drivers/video/fbdev/riva/riva_hw.c +++ b/drivers/video/fbdev/riva/riva_hw.c @@ -430,7 +430,6 @@ static char nv3_arb(nv3_fifo_info * res_info, nv3_sim_state * state, nv3_arb_in int mmisses, gmisses, vmisses, eburst_size, mburst_size; int refresh_cycle; - refresh_cycle = 0; refresh_cycle = 2*(state->mclk_khz/state->pclk_khz) + 5; mmisses = 2; if (state->mem_aligned) gmisses = 2; -- GitLab From 2079a513b0f03c8872322070944720d2c174b005 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 20:33:23 +0200 Subject: [PATCH 0246/1868] video: fbdev: intelfb: delete double assignment Delete successive assignments to the same location. In the second case, = is converted to |=, which looks appropriate based on the values involved. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression i; @@ *i = ...; i = ...; // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/intelfb/intelfbhw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/video/fbdev/intelfb/intelfbhw.c b/drivers/video/fbdev/intelfb/intelfbhw.c index fbad61da359f..d31ed4e2c46f 100644 --- a/drivers/video/fbdev/intelfb/intelfbhw.c +++ b/drivers/video/fbdev/intelfb/intelfbhw.c @@ -1191,7 +1191,6 @@ int intelfbhw_mode_to_hw(struct intelfb_info *dinfo, vsync_end = vsync_start + var->vsync_len; vtotal = vsync_end + var->upper_margin; vblank_start = vactive; - vblank_end = vtotal; vblank_end = vsync_end + 1; DBG_MSG("V: act %d, ss %d, se %d, tot %d bs %d, be %d\n", @@ -1859,7 +1858,7 @@ void intelfbhw_cursor_init(struct intelfb_info *dinfo) tmp = INREG(CURSOR_CONTROL); tmp &= ~(CURSOR_FORMAT_MASK | CURSOR_GAMMA_ENABLE | CURSOR_ENABLE | CURSOR_STRIDE_MASK); - tmp = CURSOR_FORMAT_3C; + tmp |= CURSOR_FORMAT_3C; OUTREG(CURSOR_CONTROL, tmp); OUTREG(CURSOR_A_BASEADDR, dinfo->cursor.offset << 12); tmp = (64 << CURSOR_SIZE_H_SHIFT) | -- GitLab From a9a3cac6908a86ada51ab12f7eb39d0313814d23 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 20:33:27 +0200 Subject: [PATCH 0247/1868] video: fbdev: sis: delete double assignment Delete successive assignments to the same location. The second assignment is changed to update a different field, as done in other nearby code. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression i; @@ *i = ...; i = ...; // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/sis/init301.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sis/init301.c b/drivers/video/fbdev/sis/init301.c index a89e3cafd5ad..295e0dedaf1f 100644 --- a/drivers/video/fbdev/sis/init301.c +++ b/drivers/video/fbdev/sis/init301.c @@ -1714,7 +1714,7 @@ SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned sh SiS_Pr->PanelVCLKIdx315 = VCLK81_315; /* ? */ } else { SiS_Pr->PanelHT = 1688; SiS_Pr->PanelVT = 802; - SiS_Pr->PanelHRS = 48; SiS_Pr->PanelHRS = 112; + SiS_Pr->PanelHRS = 48; SiS_Pr->PanelHRE = 112; SiS_Pr->PanelVRS = 3; SiS_Pr->PanelVRE = 6; SiS_Pr->PanelVCLKIdx300 = VCLK81_300; SiS_Pr->PanelVCLKIdx315 = VCLK81_315; -- GitLab From 58678a77029bc1f78f80f6f93da3d302d587f951 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 23 Aug 2014 20:33:28 +0200 Subject: [PATCH 0248/1868] video: fbdev: au1200fb: delete double assignment Delete successive assignments to the same location. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression i; @@ *i = ...; i = ...; // Signed-off-by: Julia Lawall Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/au1200fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index 40494dbdf519..18600d4e1b3f 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1254,7 +1254,6 @@ static void set_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata) pdata->brightness = 30; } divider = (lcd->pwmdiv & 0x3FFFF) + 1; - hi1 = (lcd->pwmhi >> 16) + 1; hi1 = (((pdata->brightness & 0xFF)+1) * divider >> 8); lcd->pwmhi &= 0xFFFF; lcd->pwmhi |= (hi1 << 16); -- GitLab From 2d0871396995139b37f9ceb153c8b07589148343 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 22 Aug 2014 15:51:03 +0200 Subject: [PATCH 0249/1868] builddeb: put the dbg files into the correct directory Since the conversion of objtree to use relative pathnames (commit 7e1c04779e, "kbuild: Use relative path for $(objtree)"), the debug info files have been ending up in /debian/dbgtmp/ in the regular linux-image package instead of the debug files package. Fix up the paths so that the debug files end up in the -dbg package. This is based on a similar patch by Darrick. Reported-and-tested-by: "Darrick J. Wong" Signed-off-by: Michal Marek --- scripts/package/builddeb | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index 35d5a5877d04..7c0e6e46905d 100644 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -152,18 +152,16 @@ if grep -q '^CONFIG_MODULES=y' $KCONFIG_CONFIG ; then rmdir "$tmpdir/lib/modules/$version" fi if [ -n "$BUILD_DEBUG" ] ; then - ( - cd $tmpdir - for module in $(find lib/modules/ -name *.ko); do - mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module) - # only keep debug symbols in the debug file - $OBJCOPY --only-keep-debug $module $dbg_dir/usr/lib/debug/$module - # strip original module from debug symbols - $OBJCOPY --strip-debug $module - # then add a link to those - $OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $module - done - ) + for module in $(find $tmpdir/lib/modules/ -name *.ko -printf '%P\n'); do + module=lib/modules/$module + mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module) + # only keep debug symbols in the debug file + $OBJCOPY --only-keep-debug $tmpdir/$module $dbg_dir/usr/lib/debug/$module + # strip original module from debug symbols + $OBJCOPY --strip-debug $tmpdir/$module + # then add a link to those + $OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $tmpdir/$module + done fi fi -- GitLab From bb964a92ce70ac2039115edd019aa5eef8faa6bb Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:24 -0500 Subject: [PATCH 0250/1868] kernel misc: Replace __get_cpu_var uses Replace uses of __get_cpu_var for address calculation with this_cpu_ptr. Cc: akpm@linux-foundation.org Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- kernel/printk/printk.c | 4 ++-- kernel/smp.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e04c455a0e38..960fbfc6cd0a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2628,7 +2628,7 @@ void wake_up_klogd(void) preempt_disable(); if (waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); - irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } preempt_enable(); } @@ -2644,7 +2644,7 @@ int printk_deferred(const char *fmt, ...) va_end(args); __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); - irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); preempt_enable(); return r; diff --git a/kernel/smp.c b/kernel/smp.c index aff8aa14f547..af24183fe6bb 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -164,7 +164,7 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, if (!csd) { csd = &csd_stack; if (!wait) - csd = &__get_cpu_var(csd_data); + csd = this_cpu_ptr(&csd_data); } csd_lock(csd); @@ -229,7 +229,7 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) WARN_ON(!irqs_disabled()); - head = &__get_cpu_var(call_single_queue); + head = this_cpu_ptr(&call_single_queue); entry = llist_del_all(head); entry = llist_reverse_order(entry); @@ -419,7 +419,7 @@ void smp_call_function_many(const struct cpumask *mask, return; } - cfd = &__get_cpu_var(cfd_data); + cfd = this_cpu_ptr(&cfd_data); cpumask_and(cfd->cpumask, mask, cpu_online_mask); cpumask_clear_cpu(this_cpu, cfd->cpumask); -- GitLab From 22127e93c587afa01e4f7225d2d1cf1d26ae7dfe Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:25 -0500 Subject: [PATCH 0251/1868] time: Replace __get_cpu_var uses Convert uses of __get_cpu_var for creating a address from a percpu offset to this_cpu_ptr. The two cases where get_cpu_var is used to actually access a percpu variable are changed to use this_cpu_read/raw_cpu_read. Reviewed-by: Thomas Gleixner Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/clocksource/dummy_timer.c | 2 +- kernel/irq_work.c | 12 ++++++------ kernel/sched/clock.c | 2 +- kernel/softirq.c | 4 ++-- kernel/time/hrtimer.c | 6 +++--- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 6 +++--- kernel/time/tick-oneshot.c | 2 +- kernel/time/tick-sched.c | 20 ++++++++++---------- kernel/time/timer.c | 2 +- 10 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/clocksource/dummy_timer.c b/drivers/clocksource/dummy_timer.c index ad3572541728..31990600fcff 100644 --- a/drivers/clocksource/dummy_timer.c +++ b/drivers/clocksource/dummy_timer.c @@ -28,7 +28,7 @@ static void dummy_timer_set_mode(enum clock_event_mode mode, static void dummy_timer_setup(void) { int cpu = smp_processor_id(); - struct clock_event_device *evt = __this_cpu_ptr(&dummy_timer_evt); + struct clock_event_device *evt = raw_cpu_ptr(&dummy_timer_evt); evt->name = "dummy_timer"; evt->features = CLOCK_EVT_FEAT_PERIODIC | diff --git a/kernel/irq_work.c b/kernel/irq_work.c index e6bcbe756663..345d19edcdae 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -95,11 +95,11 @@ bool irq_work_queue(struct irq_work *work) /* If the work is "lazy", handle it from next tick if any */ if (work->flags & IRQ_WORK_LAZY) { - if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) && + if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && tick_nohz_tick_stopped()) arch_irq_work_raise(); } else { - if (llist_add(&work->llnode, &__get_cpu_var(raised_list))) + if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) arch_irq_work_raise(); } @@ -113,8 +113,8 @@ bool irq_work_needs_cpu(void) { struct llist_head *raised, *lazy; - raised = &__get_cpu_var(raised_list); - lazy = &__get_cpu_var(lazy_list); + raised = this_cpu_ptr(&raised_list); + lazy = this_cpu_ptr(&lazy_list); if (llist_empty(raised) && llist_empty(lazy)) return false; @@ -166,8 +166,8 @@ static void irq_work_run_list(struct llist_head *list) */ void irq_work_run(void) { - irq_work_run_list(&__get_cpu_var(raised_list)); - irq_work_run_list(&__get_cpu_var(lazy_list)); + irq_work_run_list(this_cpu_ptr(&raised_list)); + irq_work_run_list(this_cpu_ptr(&lazy_list)); } EXPORT_SYMBOL_GPL(irq_work_run); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 3ef6451e972e..c27e4f8f4879 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -134,7 +134,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data); static inline struct sched_clock_data *this_scd(void) { - return &__get_cpu_var(sched_clock_data); + return this_cpu_ptr(&sched_clock_data); } static inline struct sched_clock_data *cpu_sdc(int cpu) diff --git a/kernel/softirq.c b/kernel/softirq.c index 5918d227730f..2d44b5714fe6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -485,7 +485,7 @@ static void tasklet_action(struct softirq_action *a) local_irq_disable(); list = __this_cpu_read(tasklet_vec.head); __this_cpu_write(tasklet_vec.head, NULL); - __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head); + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); local_irq_enable(); while (list) { @@ -521,7 +521,7 @@ static void tasklet_hi_action(struct softirq_action *a) local_irq_disable(); list = __this_cpu_read(tasklet_hi_vec.head); __this_cpu_write(tasklet_hi_vec.head, NULL); - __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head); + __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); local_irq_enable(); while (list) { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1c2fe7de2842..5f2229ba53d6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1144,7 +1144,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, memset(timer, 0, sizeof(struct hrtimer)); - cpu_base = &__raw_get_cpu_var(hrtimer_bases); + cpu_base = raw_cpu_ptr(&hrtimer_bases); if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS) clock_id = CLOCK_MONOTONIC; @@ -1187,7 +1187,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) struct hrtimer_cpu_base *cpu_base; int base = hrtimer_clockid_to_base(which_clock); - cpu_base = &__raw_get_cpu_var(hrtimer_bases); + cpu_base = raw_cpu_ptr(&hrtimer_bases); *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution); return 0; @@ -1376,7 +1376,7 @@ static void __hrtimer_peek_ahead_timers(void) if (!hrtimer_hres_active()) return; - td = &__get_cpu_var(tick_cpu_device); + td = this_cpu_ptr(&tick_cpu_device); if (td && td->evtdev) hrtimer_interrupt(td->evtdev); } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 64c5990fd500..066f0ec05e48 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -554,7 +554,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc) void tick_check_oneshot_broadcast_this_cpu(void) { if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); /* * We might be in the middle of switching over from diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 0a0608edeb26..decfb5f6edb0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -224,7 +224,7 @@ static void tick_setup_device(struct tick_device *td, void tick_install_replacement(struct clock_event_device *newdev) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int cpu = smp_processor_id(); clockevents_exchange_device(td->evtdev, newdev); @@ -374,14 +374,14 @@ void tick_shutdown(unsigned int *cpup) void tick_suspend(void) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); } void tick_resume(void) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int broadcast = tick_resume_broadcast(); clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 824109060a33..7ce740e78e1b 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -59,7 +59,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, */ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) { - struct tick_device *td = &__get_cpu_var(tick_cpu_device); + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *dev = td->evtdev; if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 99aa6ee3908f..73f90932282b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -205,7 +205,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); */ void __tick_nohz_full_check(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (tick_nohz_full_cpu(smp_processor_id())) { if (ts->tick_stopped && !is_idle_task(current)) { @@ -545,7 +545,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires, ret = { .tv64 = 0 }; unsigned long rcu_delta_jiffies; - struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); u64 time_delta; time_delta = timekeeping_max_deferment(); @@ -813,7 +813,7 @@ void tick_nohz_idle_enter(void) local_irq_disable(); - ts = &__get_cpu_var(tick_cpu_sched); + ts = this_cpu_ptr(&tick_cpu_sched); ts->inidle = 1; __tick_nohz_idle_enter(ts); @@ -831,7 +831,7 @@ EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); */ void tick_nohz_irq_exit(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (ts->inidle) __tick_nohz_idle_enter(ts); @@ -846,7 +846,7 @@ void tick_nohz_irq_exit(void) */ ktime_t tick_nohz_get_sleep_length(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); return ts->sleep_length; } @@ -959,7 +959,7 @@ static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) */ static void tick_nohz_handler(struct clock_event_device *dev) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); struct pt_regs *regs = get_irq_regs(); ktime_t now = ktime_get(); @@ -979,7 +979,7 @@ static void tick_nohz_handler(struct clock_event_device *dev) */ static void tick_nohz_switch_to_nohz(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t next; if (!tick_nohz_enabled) @@ -1115,7 +1115,7 @@ early_param("skew_tick", skew_tick); */ void tick_setup_sched_timer(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now = ktime_get(); /* @@ -1184,7 +1184,7 @@ void tick_clock_notify(void) */ void tick_oneshot_notify(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); set_bit(0, &ts->check_clocks); } @@ -1199,7 +1199,7 @@ void tick_oneshot_notify(void) */ int tick_check_oneshot_change(int allow_nohz) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); if (!test_and_clear_bit(0, &ts->check_clocks)) return 0; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index aca5dfe2fa3d..04d8ed8399b0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -655,7 +655,7 @@ static inline void debug_assert_init(struct timer_list *timer) static void do_init_timer(struct timer_list *timer, unsigned int flags, const char *name, struct lock_class_key *key) { - struct tvec_base *base = __raw_get_cpu_var(tvec_bases); + struct tvec_base *base = raw_cpu_read(tvec_bases); timer->entry.next = NULL; timer->base = (void *)((unsigned long)base | flags); -- GitLab From dc5df73b3afffc8d042dadffc1c959008b2c1163 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:26 -0500 Subject: [PATCH 0252/1868] time: Convert a bunch of &__get_cpu_var introduced in the 3.16 merge period Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- kernel/time/hrtimer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5f2229ba53d6..a50600d87fb7 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -558,7 +558,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) static int hrtimer_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); int res; @@ -629,7 +629,7 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) */ static void retrigger_next_event(void *arg) { - struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); if (!hrtimer_hres_active()) return; @@ -903,7 +903,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) */ debug_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); - reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); + reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); /* * We must preserve the CALLBACK state flag here, * otherwise we could move the timer base in @@ -963,7 +963,7 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * on dynticks target. */ wake_up_nohz_cpu(new_base->cpu_base->cpu); - } else if (new_base->cpu_base == &__get_cpu_var(hrtimer_bases) && + } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) && hrtimer_reprogram(timer, new_base)) { /* * Only allow reprogramming if the new base is on this CPU. @@ -1103,7 +1103,7 @@ EXPORT_SYMBOL_GPL(hrtimer_get_remaining); */ ktime_t hrtimer_get_next_event(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base = cpu_base->clock_base; ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; unsigned long flags; @@ -1242,7 +1242,7 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) */ void hrtimer_interrupt(struct clock_event_device *dev) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); ktime_t expires_next, now, entry_time, delta; int i, retries = 0; @@ -1440,7 +1440,7 @@ void hrtimer_run_pending(void) void hrtimer_run_queues(void) { struct timerqueue_node *node; - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *base; int index, gettime = 1; @@ -1679,7 +1679,7 @@ static void migrate_hrtimers(int scpu) local_irq_disable(); old_base = &per_cpu(hrtimer_bases, scpu); - new_base = &__get_cpu_var(hrtimer_bases); + new_base = this_cpu_ptr(&hrtimer_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. -- GitLab From 4a32fea9d78f2d2315c0072757b197d5a304dc8b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:27 -0500 Subject: [PATCH 0253/1868] scheduler: Replace __get_cpu_var with this_cpu_ptr Convert all uses of __get_cpu_var for address calculation to use this_cpu_ptr instead. [Uses of __get_cpu_var with cpumask_var_t are no longer handled by this patch] Cc: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/kernel_stat.h | 4 ++-- kernel/events/callchain.c | 4 ++-- kernel/events/core.c | 24 ++++++++++++------------ kernel/sched/sched.h | 4 ++-- kernel/taskstats.c | 2 +- kernel/time/tick-sched.c | 4 ++-- kernel/user-return-notifier.c | 4 ++-- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index ecbc52f9ff77..8422b4ed6882 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -44,8 +44,8 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); /* Must have preemption disabled for this to be meaningful. */ -#define kstat_this_cpu (&__get_cpu_var(kstat)) -#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat)) +#define kstat_this_cpu this_cpu_ptr(&kstat) +#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat) #define kstat_cpu(cpu) per_cpu(kstat, cpu) #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 97b67df8fbfe..c4f63e68a35c 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -137,7 +137,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) int cpu; struct callchain_cpus_entries *entries; - *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + *rctx = get_recursion_context(this_cpu_ptr(callchain_recursion)); if (*rctx == -1) return NULL; @@ -153,7 +153,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx) static void put_callchain_entry(int rctx) { - put_recursion_context(__get_cpu_var(callchain_recursion), rctx); + put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); } struct perf_callchain_entry * diff --git a/kernel/events/core.c b/kernel/events/core.c index 1cf24b3e42ec..4d44e40a0483 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -239,7 +239,7 @@ static void perf_duration_warn(struct irq_work *w) u64 avg_local_sample_len; u64 local_samples_len; - local_samples_len = __get_cpu_var(running_sample_length); + local_samples_len = __this_cpu_read(running_sample_length); avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; printk_ratelimited(KERN_WARNING @@ -261,10 +261,10 @@ void perf_sample_event_took(u64 sample_len_ns) return; /* decay the counter by 1 average sample */ - local_samples_len = __get_cpu_var(running_sample_length); + local_samples_len = __this_cpu_read(running_sample_length); local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES; local_samples_len += sample_len_ns; - __get_cpu_var(running_sample_length) = local_samples_len; + __this_cpu_write(running_sample_length, local_samples_len); /* * note: this will be biased artifically low until we have @@ -877,7 +877,7 @@ static DEFINE_PER_CPU(struct list_head, rotation_list); static void perf_pmu_rotate_start(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - struct list_head *head = &__get_cpu_var(rotation_list); + struct list_head *head = this_cpu_ptr(&rotation_list); WARN_ON(!irqs_disabled()); @@ -2389,7 +2389,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * to check if we have to switch out PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_out(task, next); } @@ -2632,11 +2632,11 @@ void __perf_event_task_sched_in(struct task_struct *prev, * to check if we have to switch in PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(&__get_cpu_var(perf_cgroup_events))) + if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); /* check for system-wide branch_stack events */ - if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) + if (atomic_read(this_cpu_ptr(&perf_branch_stack_events))) perf_branch_stack_sched_in(prev, task); } @@ -2891,7 +2891,7 @@ bool perf_event_can_stop_tick(void) void perf_event_task_tick(void) { - struct list_head *head = &__get_cpu_var(rotation_list); + struct list_head *head = this_cpu_ptr(&rotation_list); struct perf_cpu_context *cpuctx, *tmp; struct perf_event_context *ctx; int throttled; @@ -5671,7 +5671,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, struct perf_sample_data *data, struct pt_regs *regs) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); struct perf_event *event; struct hlist_head *head; @@ -5690,7 +5690,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, int perf_swevent_get_recursion_context(void) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); return get_recursion_context(swhash->recursion); } @@ -5698,7 +5698,7 @@ EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); inline void perf_swevent_put_recursion_context(int rctx) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); put_recursion_context(swhash->recursion, rctx); } @@ -5727,7 +5727,7 @@ static void perf_swevent_read(struct perf_event *event) static int perf_swevent_add(struct perf_event *event, int flags) { - struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); struct hw_perf_event *hwc = &event->hw; struct hlist_head *head; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 579712f4e9d5..77d92f8130e8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -650,10 +650,10 @@ static inline int cpu_of(struct rq *rq) DECLARE_PER_CPU(struct rq, runqueues); #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) -#define this_rq() (&__get_cpu_var(runqueues)) +#define this_rq() this_cpu_ptr(&runqueues) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -#define raw_rq() (&__raw_get_cpu_var(runqueues)) +#define raw_rq() raw_cpu_ptr(&runqueues) static inline u64 rq_clock(struct rq *rq) { diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 13d2f7cd65db..b312fcc73024 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -638,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) fill_tgid_exit(tsk); } - listeners = __this_cpu_ptr(&listener_array); + listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 73f90932282b..3cadc112519f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -924,7 +924,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) */ void tick_nohz_idle_exit(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; local_irq_disable(); @@ -1041,7 +1041,7 @@ static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now) static inline void tick_nohz_irq_enter(void) { - struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); ktime_t now; if (!ts->idle_active && !ts->tick_stopped) diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 394f70b17162..9586b670a5b2 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -14,7 +14,7 @@ static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); void user_return_notifier_register(struct user_return_notifier *urn) { set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); - hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list)); + hlist_add_head(&urn->link, this_cpu_ptr(&return_notifier_list)); } EXPORT_SYMBOL_GPL(user_return_notifier_register); @@ -25,7 +25,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register); void user_return_notifier_unregister(struct user_return_notifier *urn) { hlist_del(&urn->link); - if (hlist_empty(&__get_cpu_var(return_notifier_list))) + if (hlist_empty(this_cpu_ptr(&return_notifier_list))) clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); } EXPORT_SYMBOL_GPL(user_return_notifier_unregister); -- GitLab From a0b6bc63a20a91faef0127cc61cca6d06ee737df Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:28 -0500 Subject: [PATCH 0254/1868] block: Replace __this_cpu_ptr with raw_cpu_ptr __this_cpu_ptr is being phased out use raw_cpu_ptr instead which was introduced in 3.15-rc1. Cc: Jens Axboe Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 956027711faf..33ccdcf51a9e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4129,7 +4129,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) * per cpu locality group is to reduce the contention between block * request from multiple CPUs. */ - ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups); + ac->ac_lg = raw_cpu_ptr(sbi->s_locality_groups); /* we're going to use group allocation */ ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; -- GitLab From 1b2a1a7e8ad1144dc3f676f2651cb84e01548d59 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:29 -0500 Subject: [PATCH 0255/1868] drivers/char/random: Replace __get_cpu_var uses A single case of using __get_cpu_var for address calculation. Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index c18d41db83d8..82759cef9043 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -874,7 +874,7 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) void add_interrupt_randomness(int irq, int irq_flags) { struct entropy_store *r; - struct fast_pool *fast_pool = &__get_cpu_var(irq_randomness); + struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); -- GitLab From 229b6863b2cf9514f08e468fea586bc195ebcf50 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:30 -0500 Subject: [PATCH 0256/1868] drivers/cpuidle: Replace __get_cpu_var uses for address calculation All of these are for address calculation. Replace with this_cpu_ptr(). Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Acked-by: Rafael J. Wysocki [cpufreq changes] Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/cpuidle/governors/ladder.c | 4 ++-- drivers/cpuidle/governors/menu.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 044ee0df5871..06b57c4c4d80 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -66,7 +66,7 @@ static inline void ladder_do_selection(struct ladder_device *ldev, static int ladder_select_state(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct ladder_device *ldev = &__get_cpu_var(ladder_devices); + struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; int last_residency, last_idx = ldev->last_state_idx; int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); @@ -170,7 +170,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv, */ static void ladder_reflect(struct cpuidle_device *dev, int index) { - struct ladder_device *ldev = &__get_cpu_var(ladder_devices); + struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); if (index > 0) ldev->last_state_idx = index; } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 34db2fb3ef1e..710a233b9b0d 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -289,7 +289,7 @@ static void get_typical_interval(struct menu_device *data) */ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct menu_device *data = &__get_cpu_var(menu_devices); + struct menu_device *data = this_cpu_ptr(&menu_devices); int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; unsigned int interactivity_req; @@ -372,7 +372,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) */ static void menu_reflect(struct cpuidle_device *dev, int index) { - struct menu_device *data = &__get_cpu_var(menu_devices); + struct menu_device *data = this_cpu_ptr(&menu_devices); data->last_state_idx = index; if (index >= 0) data->needs_update = 1; @@ -385,7 +385,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) */ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct menu_device *data = &__get_cpu_var(menu_devices); + struct menu_device *data = this_cpu_ptr(&menu_devices); int last_idx = data->last_state_idx; struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; -- GitLab From 879d92745a1a5a6573dee83cfa2953413fed23fc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:31 -0500 Subject: [PATCH 0257/1868] drivers/oprofile: Replace __get_cpu_var uses for address calculation Replace the uses of __get_cpu_var for address calculation with this_cpu_ptr. Cc: Robert Richter Cc: oprofile-list@lists.sf.net Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/oprofile/cpu_buffer.c | 10 +++++----- drivers/oprofile/timer_int.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 8aa73fac6ad4..0581461c3a67 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -45,7 +45,7 @@ unsigned long oprofile_get_cpu_buffer_size(void) void oprofile_cpu_buffer_inc_smpl_lost(void) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer); cpu_buf->sample_lost_overflow++; } @@ -297,7 +297,7 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel, struct task_struct *task) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer); unsigned long backtrace = oprofile_backtrace_depth; /* @@ -357,7 +357,7 @@ oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, { struct op_sample *sample; int is_kernel = !user_mode(regs); - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer); cpu_buf->sample_received++; @@ -412,13 +412,13 @@ int oprofile_write_commit(struct op_entry *entry) void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer); log_sample(cpu_buf, pc, 0, is_kernel, event, NULL); } void oprofile_add_trace(unsigned long pc) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer); if (!cpu_buf->tracing) return; diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 61be1d9c16c8..bdef916e5dda 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -32,7 +32,7 @@ static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) static void __oprofile_hrtimer_start(void *unused) { - struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); + struct hrtimer *hrtimer = this_cpu_ptr(&oprofile_hrtimer); if (!ctr_running) return; -- GitLab From 27d051677977da11dc50caeb210204b587c94eb5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:32 -0500 Subject: [PATCH 0258/1868] drivers/clocksource: Replace __get_cpu_var used for address calculation Replace __get_cpu_var used for address calculation with this_cpu_ptr. Acked-by: James Hogan Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/clocksource/metag_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c index 9e4db41abe3c..b7384b853e5a 100644 --- a/drivers/clocksource/metag_generic.c +++ b/drivers/clocksource/metag_generic.c @@ -90,7 +90,7 @@ static struct clocksource clocksource_metag = { static irqreturn_t metag_timer_interrupt(int irq, void *dummy) { - struct clock_event_device *evt = &__get_cpu_var(local_clockevent); + struct clock_event_device *evt = this_cpu_ptr(&local_clockevent); evt->event_handler(evt); -- GitLab From 70b2776a5cb16c39ff5451c4eee72691734eabfc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:33 -0500 Subject: [PATCH 0259/1868] drivers/net/ethernet/tile: Replace __get_cpu_var uses for address calculation Replace with this_cpu_ptr. Acked-by: Chris Metcalf Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/net/ethernet/tile/tilegx.c | 18 +++++++++--------- drivers/net/ethernet/tile/tilepro.c | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 69557a26f749..e354742c3f59 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -423,7 +423,7 @@ static void tile_net_pop_all_buffers(int instance, int stack) /* Provide linux buffers to mPIPE. */ static void tile_net_provide_needed_buffers(void) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); int instance, kind; for (instance = 0; instance < NR_MPIPE_MAX && info->mpipe[instance].has_iqueue; instance++) { @@ -585,7 +585,7 @@ static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb, /* Handle a packet. Return true if "processed", false if "filtered". */ static bool tile_net_handle_packet(int instance, gxio_mpipe_idesc_t *idesc) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); struct mpipe_data *md = &mpipe_data[instance]; struct net_device *dev = md->tile_net_devs_for_channel[idesc->channel]; uint8_t l2_offset; @@ -651,7 +651,7 @@ static bool tile_net_handle_packet(int instance, gxio_mpipe_idesc_t *idesc) */ static int tile_net_poll(struct napi_struct *napi, int budget) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); unsigned int work = 0; gxio_mpipe_idesc_t *idesc; int instance, i, n; @@ -700,7 +700,7 @@ static int tile_net_poll(struct napi_struct *napi, int budget) /* Handle an ingress interrupt from an instance on the current cpu. */ static irqreturn_t tile_net_handle_ingress_irq(int irq, void *id) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); napi_schedule(&info->mpipe[(uint64_t)id].napi); return IRQ_HANDLED; } @@ -763,7 +763,7 @@ static enum hrtimer_restart tile_net_handle_tx_wake_timer(struct hrtimer *t) /* Make sure the egress timer is scheduled. */ static void tile_net_schedule_egress_timer(void) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); if (!info->egress_timer_scheduled) { hrtimer_start(&info->egress_timer, @@ -780,7 +780,7 @@ static void tile_net_schedule_egress_timer(void) */ static enum hrtimer_restart tile_net_handle_egress_timer(struct hrtimer *t) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); unsigned long irqflags; bool pending = false; int i, instance; @@ -1996,7 +1996,7 @@ static unsigned int tile_net_tx_frags(struct frag *frags, /* Help the kernel transmit a packet. */ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); struct tile_net_priv *priv = netdev_priv(dev); int instance = priv->instance; struct mpipe_data *md = &mpipe_data[instance]; @@ -2138,7 +2138,7 @@ static int tile_net_set_mac_address(struct net_device *dev, void *p) static void tile_net_netpoll(struct net_device *dev) { int instance = mpipe_instance(dev); - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); struct mpipe_data *md = &mpipe_data[instance]; disable_percpu_irq(md->ingress_irq); @@ -2237,7 +2237,7 @@ static void tile_net_dev_init(const char *name, const uint8_t *mac) /* Per-cpu module initialization. */ static void tile_net_init_module_percpu(void *unused) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); int my_cpu = smp_processor_id(); int instance; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 88c712126692..3faf03fce827 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -996,13 +996,13 @@ static void tile_net_register(void *dev_ptr) PDEBUG("tile_net_register(queue_id %d)\n", queue_id); if (!strcmp(dev->name, "xgbe0")) - info = &__get_cpu_var(hv_xgbe0); + info = this_cpu_ptr(&hv_xgbe0); else if (!strcmp(dev->name, "xgbe1")) - info = &__get_cpu_var(hv_xgbe1); + info = this_cpu_ptr(&hv_xgbe1); else if (!strcmp(dev->name, "gbe0")) - info = &__get_cpu_var(hv_gbe0); + info = this_cpu_ptr(&hv_gbe0); else if (!strcmp(dev->name, "gbe1")) - info = &__get_cpu_var(hv_gbe1); + info = this_cpu_ptr(&hv_gbe1); else BUG(); -- GitLab From f7f66b05aa2ac2632c5441a3f129f3be827fe7e7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:34 -0500 Subject: [PATCH 0260/1868] watchdog: Replace __raw_get_cpu_var uses Most of these are the uses of &__raw_get_cpu_var for address calculation. touch_softlockup_watchdog_sync() uses __raw_get_cpu_var to write to per cpu variables. Use __this_cpu_write instead. Cc: Wim Van Sebroeck Cc: linux-watchdog@vger.kernel.org Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- kernel/watchdog.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index a8d6914030fe..dca8cae7e55d 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -185,7 +185,7 @@ void touch_nmi_watchdog(void) * case we shouldn't have to worry about the watchdog * going off. */ - __raw_get_cpu_var(watchdog_nmi_touch) = true; + raw_cpu_write(watchdog_nmi_touch, true); touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); @@ -194,8 +194,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog); void touch_softlockup_watchdog_sync(void) { - __raw_get_cpu_var(softlockup_touch_sync) = true; - __raw_get_cpu_var(watchdog_touch_ts) = 0; + __this_cpu_write(softlockup_touch_sync, true); + __this_cpu_write(watchdog_touch_ts, 0); } #ifdef CONFIG_HARDLOCKUP_DETECTOR @@ -387,7 +387,7 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) static void watchdog_enable(unsigned int cpu) { - struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); /* kick off the timer for the hardlockup detector */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -407,7 +407,7 @@ static void watchdog_enable(unsigned int cpu) static void watchdog_disable(unsigned int cpu) { - struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); watchdog_set_prio(SCHED_NORMAL, 0); hrtimer_cancel(hrtimer); @@ -534,7 +534,7 @@ static struct smp_hotplug_thread watchdog_threads = { static void restart_watchdog_hrtimer(void *info) { - struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); int ret; /* -- GitLab From 903ceff7ca7b4d80c083a80ee5163b74e9fa359f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:35 -0500 Subject: [PATCH 0261/1868] net: Replace get_cpu_var through this_cpu_ptr Replace uses of get_cpu_var for address calculation through this_cpu_ptr. Cc: netdev@vger.kernel.org Cc: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/net/netfilter/nf_conntrack.h | 2 +- include/net/snmp.h | 6 +++--- net/core/dev.c | 14 +++++++------- net/core/drop_monitor.c | 2 +- net/core/skbuff.c | 2 +- net/ipv4/route.c | 4 ++-- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 2 +- net/ipv6/syncookies.c | 2 +- net/rds/ib_rdma.c | 2 +- 11 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 37252f71a380..c8a7db605e03 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -242,7 +242,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked); static inline struct nf_conn *nf_ct_untracked_get(void) { - return &__raw_get_cpu_var(nf_conntrack_untracked); + return raw_cpu_ptr(&nf_conntrack_untracked); } void nf_ct_untracked_status_or(unsigned long bits); diff --git a/include/net/snmp.h b/include/net/snmp.h index f1f27fdbb0d5..e154133877a2 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -168,7 +168,7 @@ struct linux_xfrm_mib { #define SNMP_ADD_STATS64_BH(mib, field, addend) \ do { \ - __typeof__(*mib) *ptr = __this_cpu_ptr(mib); \ + __typeof__(*mib) *ptr = raw_cpu_ptr(mib); \ u64_stats_update_begin(&ptr->syncp); \ ptr->mibs[field] += addend; \ u64_stats_update_end(&ptr->syncp); \ @@ -189,8 +189,8 @@ struct linux_xfrm_mib { #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1) #define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) \ do { \ - __typeof__(*mib) *ptr; \ - ptr = __this_cpu_ptr(mib); \ + __typeof__(*mib) *ptr; \ + ptr = raw_cpu_ptr((mib)); \ u64_stats_update_begin(&ptr->syncp); \ ptr->mibs[basefield##PKTS]++; \ ptr->mibs[basefield##OCTETS] += addend; \ diff --git a/net/core/dev.c b/net/core/dev.c index b65a5051361f..9ef13ff354fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2153,7 +2153,7 @@ static inline void __netif_reschedule(struct Qdisc *q) unsigned long flags; local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); + sd = this_cpu_ptr(&softnet_data); q->next_sched = NULL; *sd->output_queue_tailp = q; sd->output_queue_tailp = &q->next_sched; @@ -3195,7 +3195,7 @@ static void rps_trigger_softirq(void *data) static int rps_ipi_queued(struct softnet_data *sd) { #ifdef CONFIG_RPS - struct softnet_data *mysd = &__get_cpu_var(softnet_data); + struct softnet_data *mysd = this_cpu_ptr(&softnet_data); if (sd != mysd) { sd->rps_ipi_next = mysd->rps_ipi_list; @@ -3222,7 +3222,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) if (qlen < (netdev_max_backlog >> 1)) return false; - sd = &__get_cpu_var(softnet_data); + sd = this_cpu_ptr(&softnet_data); rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); @@ -3369,7 +3369,7 @@ EXPORT_SYMBOL(netif_rx_ni); static void net_tx_action(struct softirq_action *h) { - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); if (sd->completion_queue) { struct sk_buff *clist; @@ -3794,7 +3794,7 @@ EXPORT_SYMBOL(netif_receive_skb); static void flush_backlog(void *arg) { struct net_device *dev = arg; - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); struct sk_buff *skb, *tmp; rps_lock(sd); @@ -4301,7 +4301,7 @@ void __napi_schedule(struct napi_struct *n) unsigned long flags; local_irq_save(flags); - ____napi_schedule(&__get_cpu_var(softnet_data), n); + ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); } EXPORT_SYMBOL(__napi_schedule); @@ -4422,7 +4422,7 @@ EXPORT_SYMBOL(netif_napi_del); static void net_rx_action(struct softirq_action *h) { - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 50f9a9db5792..252e155c837b 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -146,7 +146,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) unsigned long flags; local_irq_save(flags); - data = &__get_cpu_var(dm_cpu_data); + data = this_cpu_ptr(&dm_cpu_data); spin_lock(&data->lock); dskb = data->skb; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 163b673f9e62..adfc7ee1acf2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -345,7 +345,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) unsigned long flags; local_irq_save(flags); - nc = &__get_cpu_var(netdev_alloc_cache); + nc = this_cpu_ptr(&netdev_alloc_cache); if (unlikely(!nc->frag.page)) { refill: for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index eaa4b000c7b4..7d6f4e021846 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1311,7 +1311,7 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; } else { - p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); } orig = *p; @@ -1939,7 +1939,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache = false; goto add; } - prth = __this_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c0c75688896e..f83391bfdd76 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -40,7 +40,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); - tmp = __get_cpu_var(ipv4_cookie_scratch); + tmp = this_cpu_ptr(ipv4_cookie_scratch); memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c])); tmp[0] = (__force u32)saddr; tmp[1] = (__force u32)daddr; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541f26a67ba2..b2cab7770a11 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3058,7 +3058,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) local_bh_disable(); p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - return __this_cpu_ptr(p); + return raw_cpu_ptr(p); local_bh_enable(); return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5a7c41fbc6d3..d145f7ef78f3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -842,7 +842,7 @@ void tcp_wfree(struct sk_buff *skb) /* queue this socket to tasklet queue */ local_irq_save(flags); - tsq = &__get_cpu_var(tsq_tasklet); + tsq = this_cpu_ptr(&tsq_tasklet); list_add(&tp->tsq_node, &tsq->head); tasklet_schedule(&tsq->tasklet); local_irq_restore(flags); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 83cea1d39466..637de5b87589 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -67,7 +67,7 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); - tmp = __get_cpu_var(ipv6_cookie_scratch); + tmp = this_cpu_ptr(ipv6_cookie_scratch); /* * we have 320 bits of information to hash, copy in the remaining diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index e8fdb172adbb..273b8bff6ba4 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -267,7 +267,7 @@ static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) unsigned long *flag; preempt_disable(); - flag = &__get_cpu_var(clean_list_grace); + flag = this_cpu_ptr(&clean_list_grace); set_bit(CLEAN_LIST_BUSY_BIT, flag); ret = llist_del_first(&pool->clean_list); if (ret) -- GitLab From 1f125e76f5c134b0a904ef30e96ee8da6a49f4b4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:36 -0500 Subject: [PATCH 0262/1868] md: Replace __this_cpu_ptr with raw_cpu_ptr __this_cpu_ptr is being phased out. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/md/dm-stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c index 28a90122a5a8..87f86c77b094 100644 --- a/drivers/md/dm-stats.c +++ b/drivers/md/dm-stats.c @@ -548,7 +548,7 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, * A race condition can at worst result in the merged flag being * misrepresented, so we don't have to disable preemption here. */ - last = __this_cpu_ptr(stats->last); + last = raw_cpu_ptr(stats->last); stats_aux->merged = (bi_sector == (ACCESS_ONCE(last->last_sector) && ((bi_rw & (REQ_WRITE | REQ_DISCARD)) == -- GitLab From bd83e65bfaedafef1ba21ce19c1ea7913da01bec Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:37 -0500 Subject: [PATCH 0263/1868] metag: Replace __get_cpu_var uses for address calculation Replace __get_cpu_var uses for address calculation with this_cpu_ptr(). Acked-by: James Hogan Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/metag/kernel/perf/perf_event.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 02c08737f6aa..2478ec6d23c9 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -258,7 +258,7 @@ int metag_pmu_event_set_period(struct perf_event *event, static void metag_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -306,7 +306,7 @@ static void metag_pmu_stop(struct perf_event *event, int flags) static int metag_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = 0, ret = 0; @@ -348,7 +348,7 @@ static int metag_pmu_add(struct perf_event *event, int flags) static void metag_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -597,7 +597,7 @@ static int _hw_perf_event_init(struct perf_event *event) static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) { - struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); unsigned int config = event->config; unsigned int tmp = config & 0xf0; unsigned long flags; @@ -670,7 +670,7 @@ static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx) static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx) { - struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); unsigned int tmp = 0; unsigned long flags; @@ -718,7 +718,7 @@ static u64 metag_pmu_read_counter(int idx) static void metag_pmu_write_counter(int idx, u32 val) { - struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events); u32 tmp = 0; unsigned long flags; @@ -751,7 +751,7 @@ static int metag_pmu_event_map(int idx) static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev) { int idx = (int)dev; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event = cpuhw->events[idx]; struct hw_perf_event *hwc = &event->hw; struct pt_regs *regs = get_irq_regs(); -- GitLab From eee8492db674a5c89daa83158d8bf2839efc8850 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:38 -0500 Subject: [PATCH 0264/1868] drivers/net/ethernet/tile: __get_cpu_var call introduced in 3.14 Another case was merged for 3.14-rc1 Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/net/ethernet/tile/tilegx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index e354742c3f59..049747f558c9 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -551,7 +551,7 @@ static inline bool filter_packet(struct net_device *dev, void *buf) static void tile_net_receive_skb(struct net_device *dev, struct sk_buff *skb, gxio_mpipe_idesc_t *idesc, unsigned long len) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); struct tile_net_priv *priv = netdev_priv(dev); int instance = priv->instance; @@ -1927,7 +1927,7 @@ static void tso_egress(struct net_device *dev, gxio_mpipe_equeue_t *equeue, */ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) { - struct tile_net_info *info = &__get_cpu_var(per_cpu_info); + struct tile_net_info *info = this_cpu_ptr(&per_cpu_info); struct tile_net_priv *priv = netdev_priv(dev); int channel = priv->echannel; int instance = priv->instance; -- GitLab From 532d0d0690d1532dcc5a190162ad820b636bcd4d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:39 -0500 Subject: [PATCH 0265/1868] irqchips: Replace __this_cpu_ptr uses [ARM specific] These are generally replaced with raw_cpu_ptr. However, in gic_get_percpu_base() we immediately dereference the pointer. This is equivalent to a raw_cpu_read. So use that operation there. Cc: nicolas.pitre@linaro.org Cc: Russell King Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/irqchip/irq-gic.c | 10 +++++----- kernel/irq/chip.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 4b959e606fe8..399a707ec51e 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -102,7 +102,7 @@ static struct gic_chip_data gic_data[MAX_GIC_NR] __read_mostly; #ifdef CONFIG_GIC_NON_BANKED static void __iomem *gic_get_percpu_base(union gic_base *base) { - return *__this_cpu_ptr(base->percpu_base); + return raw_cpu_read(base->percpu_base); } static void __iomem *gic_get_common_base(union gic_base *base) @@ -504,11 +504,11 @@ static void gic_cpu_save(unsigned int gic_nr) if (!dist_base || !cpu_base) return; - ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable); + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_enable); for (i = 0; i < DIV_ROUND_UP(32, 32); i++) ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4); - ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); for (i = 0; i < DIV_ROUND_UP(32, 16); i++) ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4); @@ -530,11 +530,11 @@ static void gic_cpu_restore(unsigned int gic_nr) if (!dist_base || !cpu_base) return; - ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_enable); + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_enable); for (i = 0; i < DIV_ROUND_UP(32, 32); i++) writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4); - ptr = __this_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); + ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf); for (i = 0; i < DIV_ROUND_UP(32, 16); i++) writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index a2b28a2fd7b1..cca7292fc61e 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -669,7 +669,7 @@ void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irqaction *action = desc->action; - void *dev_id = __this_cpu_ptr(action->percpu_dev_id); + void *dev_id = raw_cpu_ptr(action->percpu_dev_id); irqreturn_t res; kstat_incr_irqs_this_cpu(irq, desc); -- GitLab From 89cbc76768c2fa4ed95545bf961f3a14ddfeed21 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:40 -0500 Subject: [PATCH 0266/1868] x86: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Thomas Gleixner Cc: x86@kernel.org Acked-by: H. Peter Anvin Acked-by: Ingo Molnar Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/debugreg.h | 4 +- arch/x86/include/asm/uv/uv_hub.h | 2 +- arch/x86/kernel/apb_timer.c | 4 +- arch/x86/kernel/apic/apic.c | 4 +- arch/x86/kernel/cpu/common.c | 6 +-- arch/x86/kernel/cpu/mcheck/mce-inject.c | 6 +-- arch/x86/kernel/cpu/mcheck/mce.c | 46 ++++++++++----------- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel.c | 22 +++++----- arch/x86/kernel/cpu/perf_event.c | 22 +++++----- arch/x86/kernel/cpu/perf_event_amd.c | 4 +- arch/x86/kernel/cpu/perf_event_intel.c | 18 ++++---- arch/x86/kernel/cpu/perf_event_intel_ds.c | 20 ++++----- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 12 +++--- arch/x86/kernel/cpu/perf_event_intel_rapl.c | 12 +++--- arch/x86/kernel/cpu/perf_event_knc.c | 2 +- arch/x86/kernel/cpu/perf_event_p4.c | 6 +-- arch/x86/kernel/hw_breakpoint.c | 8 ++-- arch/x86/kernel/irq_64.c | 6 +-- arch/x86/kernel/kvm.c | 22 +++++----- arch/x86/kvm/svm.c | 6 +-- arch/x86/kvm/vmx.c | 10 ++--- arch/x86/kvm/x86.c | 2 +- arch/x86/mm/kmemcheck/kmemcheck.c | 14 +++---- arch/x86/oprofile/nmi_int.c | 8 ++-- arch/x86/platform/uv/uv_time.c | 2 +- arch/x86/xen/enlighten.c | 4 +- arch/x86/xen/multicalls.c | 8 ++-- arch/x86/xen/spinlock.c | 2 +- arch/x86/xen/time.c | 10 ++--- 30 files changed, 147 insertions(+), 147 deletions(-) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4b528a970bd4..61fd18b83b6c 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -97,11 +97,11 @@ extern void hw_breakpoint_restore(void); DECLARE_PER_CPU(int, debug_stack_usage); static inline void debug_stack_usage_inc(void) { - __get_cpu_var(debug_stack_usage)++; + __this_cpu_inc(debug_stack_usage); } static inline void debug_stack_usage_dec(void) { - __get_cpu_var(debug_stack_usage)--; + __this_cpu_dec(debug_stack_usage); } int is_debug_stack(unsigned long addr); void debug_stack_set_zero(void); diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index c63e925fd6b7..bb84cfd5a1a1 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -164,7 +164,7 @@ struct uv_hub_info_s { }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index af5b08ab3b71..5972b108f15a 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -146,7 +146,7 @@ static inline int is_apbt_capable(void) static int __init apbt_clockevent_register(void) { struct sfi_timer_table_entry *mtmr; - struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); + struct apbt_dev *adev = this_cpu_ptr(&cpu_apbt_dev); mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); if (mtmr == NULL) { @@ -200,7 +200,7 @@ void apbt_setup_secondary_clock(void) if (!cpu) return; - adev = &__get_cpu_var(cpu_apbt_dev); + adev = this_cpu_ptr(&cpu_apbt_dev); if (!adev->timer) { adev->timer = dw_apb_clockevent_init(cpu, adev->name, APBT_CLOCKEVENT_RATING, adev_virt_addr(adev), diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 67760275544b..00853b254ab0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -561,7 +561,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); */ static void setup_APIC_timer(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); if (this_cpu_has(X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; @@ -696,7 +696,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init calibrate_APIC_clock(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = this_cpu_ptr(&lapic_events); void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; long delta, deltatsc; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e4ab2b42bd6f..5666eb9568fc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1198,9 +1198,9 @@ DEFINE_PER_CPU(int, debug_stack_usage); int is_debug_stack(unsigned long addr) { - return __get_cpu_var(debug_stack_usage) || - (addr <= __get_cpu_var(debug_stack_addr) && - addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); + return __this_cpu_read(debug_stack_usage) || + (addr <= __this_cpu_read(debug_stack_addr) && + addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); } NOKPROBE_SYMBOL(is_debug_stack); diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 5ac2d1fb28bc..4cfba4371a71 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -83,7 +83,7 @@ static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) return NMI_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); @@ -97,7 +97,7 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) static void mce_irq_ipi(void *info) { int cpu = smp_processor_id(); - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); if (cpumask_test_cpu(cpu, mce_inject_cpumask) && m->inject_flags & MCJ_EXCEPTION) { @@ -109,7 +109,7 @@ static void mce_irq_ipi(void *info) /* Inject mce on current CPU */ static int raise_local(void) { - struct mce *m = &__get_cpu_var(injectm); + struct mce *m = this_cpu_ptr(&injectm); int context = MCJ_CTX(m->inject_flags); int ret = 0; int cpu = m->extcpu; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bd9ccda8087f..61a9668cebfd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -400,7 +400,7 @@ static u64 mce_rdmsrl(u32 msr) if (offset < 0) return 0; - return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); + return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset); } if (rdmsrl_safe(msr, &v)) { @@ -422,7 +422,7 @@ static void mce_wrmsrl(u32 msr, u64 v) int offset = msr_to_offset(msr); if (offset >= 0) - *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; + *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v; return; } wrmsrl(msr, v); @@ -478,7 +478,7 @@ static DEFINE_PER_CPU(struct mce_ring, mce_ring); /* Runs with CPU affinity in workqueue */ static int mce_ring_empty(void) { - struct mce_ring *r = &__get_cpu_var(mce_ring); + struct mce_ring *r = this_cpu_ptr(&mce_ring); return r->start == r->end; } @@ -490,7 +490,7 @@ static int mce_ring_get(unsigned long *pfn) *pfn = 0; get_cpu(); - r = &__get_cpu_var(mce_ring); + r = this_cpu_ptr(&mce_ring); if (r->start == r->end) goto out; *pfn = r->ring[r->start]; @@ -504,7 +504,7 @@ static int mce_ring_get(unsigned long *pfn) /* Always runs in MCE context with preempt off */ static int mce_ring_add(unsigned long pfn) { - struct mce_ring *r = &__get_cpu_var(mce_ring); + struct mce_ring *r = this_cpu_ptr(&mce_ring); unsigned next; next = (r->end + 1) % MCE_RING_SIZE; @@ -526,7 +526,7 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { if (!mce_ring_empty()) - schedule_work(&__get_cpu_var(mce_work)); + schedule_work(this_cpu_ptr(&mce_work)); } DEFINE_PER_CPU(struct irq_work, mce_irq_work); @@ -551,7 +551,7 @@ static void mce_report_event(struct pt_regs *regs) return; } - irq_work_queue(&__get_cpu_var(mce_irq_work)); + irq_work_queue(this_cpu_ptr(&mce_irq_work)); } /* @@ -1045,7 +1045,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_gather_info(&m, regs); - final = &__get_cpu_var(mces_seen); + final = this_cpu_ptr(&mces_seen); *final = m; memset(valid_banks, 0, sizeof(valid_banks)); @@ -1278,22 +1278,22 @@ static unsigned long (*mce_adjust_timer)(unsigned long interval) = static int cmc_error_seen(void) { - unsigned long *v = &__get_cpu_var(mce_polled_error); + unsigned long *v = this_cpu_ptr(&mce_polled_error); return test_and_clear_bit(0, v); } static void mce_timer_fn(unsigned long data) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long iv; int notify; WARN_ON(smp_processor_id() != data); - if (mce_available(__this_cpu_ptr(&cpu_info))) { + if (mce_available(this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + this_cpu_ptr(&mce_poll_banks)); mce_intel_cmci_poll(); } @@ -1323,7 +1323,7 @@ static void mce_timer_fn(unsigned long data) */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long when = jiffies + interval; unsigned long iv = __this_cpu_read(mce_next_interval); @@ -1659,7 +1659,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); setup_timer(t, mce_timer_fn, cpu); @@ -1702,8 +1702,8 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_timer(); - INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); + INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); + init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); } /* @@ -1955,7 +1955,7 @@ static struct miscdevice mce_chrdev_device = { static void __mce_disable_bank(void *arg) { int bank = *((int *)arg); - __clear_bit(bank, __get_cpu_var(mce_poll_banks)); + __clear_bit(bank, this_cpu_ptr(mce_poll_banks)); cmci_disable_bank(bank); } @@ -2065,7 +2065,7 @@ static void mce_syscore_shutdown(void) static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); - __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); + __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); } static struct syscore_ops mce_syscore_ops = { @@ -2080,7 +2080,7 @@ static struct syscore_ops mce_syscore_ops = { static void mce_cpu_restart(void *data) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); __mcheck_cpu_init_timer(); @@ -2096,14 +2096,14 @@ static void mce_restart(void) /* Toggle features for corrected errors */ static void mce_disable_cmci(void *data) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; cmci_clear(); } static void mce_enable_ce(void *all) { - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; cmci_reenable(); cmci_recheck(); @@ -2336,7 +2336,7 @@ static void mce_disable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) @@ -2354,7 +2354,7 @@ static void mce_reenable_cpu(void *h) unsigned long action = *(unsigned long *)h; int i; - if (!mce_available(__this_cpu_ptr(&cpu_info))) + if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1e49f8f41276..5d4999f95aec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -310,7 +310,7 @@ static void amd_threshold_interrupt(void) * event. */ machine_check_poll(MCP_TIMESTAMP, - &__get_cpu_var(mce_poll_banks)); + this_cpu_ptr(&mce_poll_banks)); if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 3bdb95ae8c43..b3c97bafc123 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -86,7 +86,7 @@ void mce_intel_cmci_poll(void) { if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) return; - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); } void mce_intel_hcpu_update(unsigned long cpu) @@ -145,7 +145,7 @@ static void cmci_storm_disable_banks(void) u64 val; raw_spin_lock_irqsave(&cmci_discover_lock, flags); - owned = __get_cpu_var(mce_banks_owned); + owned = this_cpu_ptr(mce_banks_owned); for_each_set_bit(bank, owned, MAX_NR_BANKS) { rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; @@ -195,7 +195,7 @@ static void intel_threshold_interrupt(void) { if (cmci_storm_detect()) return; - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); mce_notify_irq(); } @@ -206,7 +206,7 @@ static void intel_threshold_interrupt(void) */ static void cmci_discover(int banks) { - unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); + unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned); unsigned long flags; int i; int bios_wrong_thresh = 0; @@ -228,7 +228,7 @@ static void cmci_discover(int banks) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { clear_bit(i, owned); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); + __clear_bit(i, this_cpu_ptr(mce_poll_banks)); continue; } @@ -252,7 +252,7 @@ static void cmci_discover(int banks) /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { set_bit(i, owned); - __clear_bit(i, __get_cpu_var(mce_poll_banks)); + __clear_bit(i, this_cpu_ptr(mce_poll_banks)); /* * We are able to set thresholds for some banks that * had a threshold of 0. This means the BIOS has not @@ -263,7 +263,7 @@ static void cmci_discover(int banks) (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) bios_wrong_thresh = 1; } else { - WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); + WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks))); } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); @@ -284,10 +284,10 @@ void cmci_recheck(void) unsigned long flags; int banks; - if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) + if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks)) return; local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); + machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); local_irq_restore(flags); } @@ -296,12 +296,12 @@ static void __cmci_disable_bank(int bank) { u64 val; - if (!test_bit(bank, __get_cpu_var(mce_banks_owned))) + if (!test_bit(bank, this_cpu_ptr(mce_banks_owned))) return; rdmsrl(MSR_IA32_MCx_CTL2(bank), val); val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(bank), val); - __clear_bit(bank, __get_cpu_var(mce_banks_owned)); + __clear_bit(bank, this_cpu_ptr(mce_banks_owned)); } /* diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2879ecdaac43..5cd2b7967370 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -487,7 +487,7 @@ static int __x86_pmu_event_init(struct perf_event *event) void x86_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -505,7 +505,7 @@ void x86_pmu_disable_all(void) static void x86_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu_initialized()) return; @@ -522,7 +522,7 @@ static void x86_pmu_disable(struct pmu *pmu) void x86_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -869,7 +869,7 @@ static void x86_pmu_start(struct perf_event *event, int flags); static void x86_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; int i, added = cpuc->n_added; @@ -1020,7 +1020,7 @@ void x86_pmu_enable_event(struct perf_event *event) */ static int x86_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc; int assign[X86_PMC_IDX_MAX]; int n, n0, ret; @@ -1071,7 +1071,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) static void x86_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = event->hw.idx; if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -1150,7 +1150,7 @@ void perf_event_print_debug(void) void x86_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { @@ -1172,7 +1172,7 @@ void x86_pmu_stop(struct perf_event *event, int flags) static void x86_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; /* @@ -1227,7 +1227,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* * Some chipsets need to unmask the LVTPC in a particular spot @@ -1636,7 +1636,7 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) */ static int x86_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int assign[X86_PMC_IDX_MAX]; int n, ret; @@ -1995,7 +1995,7 @@ static unsigned long get_segment_base(unsigned int segment) if (idx > GDT_ENTRIES) return 0; - desc = __this_cpu_ptr(&gdt_page.gdt[0]); + desc = raw_cpu_ptr(gdt_page.gdt); } return get_desc_base(desc + idx); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index beeb7cc07044..28926311aac1 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -699,7 +699,7 @@ __init int amd_pmu_init(void) void amd_pmu_enable_virt(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); cpuc->perf_ctr_virt_mask = 0; @@ -711,7 +711,7 @@ EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); void amd_pmu_disable_virt(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); /* * We only mask out the Host-only bit so that host-only counting works diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2502d0d9d246..6f80accf137d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1045,7 +1045,7 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) static void intel_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -1058,7 +1058,7 @@ static void intel_pmu_disable_all(void) static void intel_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); intel_pmu_pebs_enable_all(); intel_pmu_lbr_enable_all(); @@ -1092,7 +1092,7 @@ static void intel_pmu_enable_all(int added) */ static void intel_pmu_nhm_workaround(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); static const unsigned long nhm_magic[4] = { 0x4300B5, 0x4300D2, @@ -1191,7 +1191,7 @@ static inline bool event_is_checkpointed(struct perf_event *event) static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { intel_pmu_disable_bts(); @@ -1255,7 +1255,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) static void intel_pmu_enable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { if (!__this_cpu_read(cpu_hw_events.enabled)) @@ -1349,7 +1349,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 status; int handled; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* * No known reason to not always do late ACK, @@ -1781,7 +1781,7 @@ EXPORT_SYMBOL_GPL(perf_guest_get_msrs); static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; @@ -1802,7 +1802,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; int idx; @@ -1836,7 +1836,7 @@ static void core_pmu_enable_event(struct perf_event *event) static void core_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 696ade311ded..7b786b369789 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -475,7 +475,7 @@ void intel_pmu_enable_bts(u64 config) void intel_pmu_disable_bts(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long debugctlmsr; if (!cpuc->ds) @@ -492,7 +492,7 @@ void intel_pmu_disable_bts(void) int intel_pmu_drain_bts_buffer(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct bts_record { u64 from; @@ -712,7 +712,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) void intel_pmu_pebs_enable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; @@ -727,7 +727,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) void intel_pmu_pebs_disable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; cpuc->pebs_enabled &= ~(1ULL << hwc->idx); @@ -745,7 +745,7 @@ void intel_pmu_pebs_disable(struct perf_event *event) void intel_pmu_pebs_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -753,7 +753,7 @@ void intel_pmu_pebs_enable_all(void) void intel_pmu_pebs_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); @@ -761,7 +761,7 @@ void intel_pmu_pebs_disable_all(void) static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long from = cpuc->lbr_entries[0].from; unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; @@ -868,7 +868,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * We cast to the biggest pebs_record but are careful not to * unconditionally access the 'extra' entries. */ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct pebs_record_hsw *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; @@ -957,7 +957,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = cpuc->events[0]; /* PMC0 only */ struct pebs_record_core *at, *top; @@ -998,7 +998,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = NULL; void *at, *top; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 9dd2459a4c73..ebb0d3144551 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -133,7 +133,7 @@ static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); static void __intel_pmu_lbr_enable(void) { u64 debugctl; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_sel) wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); @@ -183,7 +183,7 @@ void intel_pmu_lbr_reset(void) void intel_pmu_lbr_enable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; @@ -203,7 +203,7 @@ void intel_pmu_lbr_enable(struct perf_event *event) void intel_pmu_lbr_disable(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) return; @@ -220,7 +220,7 @@ void intel_pmu_lbr_disable(struct perf_event *event) void intel_pmu_lbr_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) __intel_pmu_lbr_enable(); @@ -228,7 +228,7 @@ void intel_pmu_lbr_enable_all(void) void intel_pmu_lbr_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->lbr_users) __intel_pmu_lbr_disable(); @@ -332,7 +332,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) void intel_pmu_lbr_read(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!cpuc->lbr_users) return; diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 619f7699487a..d64f275fe274 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -135,7 +135,7 @@ static inline u64 rapl_scale(u64 v) * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); + return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); } static u64 rapl_event_update(struct perf_event *event) @@ -187,7 +187,7 @@ static void rapl_stop_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct perf_event *event; unsigned long flags; @@ -234,7 +234,7 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); unsigned long flags; spin_lock_irqsave(&pmu->lock, flags); @@ -244,7 +244,7 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode) static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct hw_perf_event *hwc = &event->hw; unsigned long flags; @@ -278,7 +278,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); + struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); struct hw_perf_event *hwc = &event->hw; unsigned long flags; @@ -696,7 +696,7 @@ static int __init rapl_pmu_init(void) return -1; } - pmu = __get_cpu_var(rapl_pmu); + pmu = __this_cpu_read(rapl_pmu); pr_info("RAPL PMU detected, hw unit 2^-%d Joules," " API unit is 2^-32 Joules," diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c index 838fa8772c62..5b0c232d1ee6 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -217,7 +217,7 @@ static int knc_pmu_handle_irq(struct pt_regs *regs) int bit, loops; u64 status; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); knc_pmu_disable_all(); diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 5d466b7d8609..f2e56783af3d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -915,7 +915,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) static void p4_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -984,7 +984,7 @@ static void p4_pmu_enable_event(struct perf_event *event) static void p4_pmu_enable_all(int added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { @@ -1004,7 +1004,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { int overflow; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 5f9cf20cdb68..3d5fb509bdeb 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -108,7 +108,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) int i; for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); if (!*slot) { *slot = bp; @@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) set_debugreg(info->address, i); __this_cpu_write(cpu_debugreg[i], info->address); - dr7 = &__get_cpu_var(cpu_dr7); + dr7 = this_cpu_ptr(&cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -146,7 +146,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) int i; for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]); if (*slot == bp) { *slot = NULL; @@ -157,7 +157,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = &__get_cpu_var(cpu_dr7); + dr7 = this_cpu_ptr(&cpu_dr7); *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4d1c746892eb..e4b503d5558c 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -52,13 +52,13 @@ static inline void stack_overflow_check(struct pt_regs *regs) regs->sp <= curbase + THREAD_SIZE) return; - irq_stack_top = (u64)__get_cpu_var(irq_stack_union.irq_stack) + + irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) + STACK_TOP_MARGIN; - irq_stack_bottom = (u64)__get_cpu_var(irq_stack_ptr); + irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) return; - oist = &__get_cpu_var(orig_ist); + oist = this_cpu_ptr(&orig_ist); estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; if (regs->sp >= estack_top && regs->sp <= estack_bottom) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 3dd8e2c4d74a..2b68102dbbeb 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -243,9 +243,9 @@ u32 kvm_read_and_reset_pf_reason(void) { u32 reason = 0; - if (__get_cpu_var(apf_reason).enabled) { - reason = __get_cpu_var(apf_reason).reason; - __get_cpu_var(apf_reason).reason = 0; + if (__this_cpu_read(apf_reason.enabled)) { + reason = __this_cpu_read(apf_reason.reason); + __this_cpu_write(apf_reason.reason, 0); } return reason; @@ -318,7 +318,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val) * there's no need for lock or memory barriers. * An optimization barrier is implied in apic write. */ - if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi))) + if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi))) return; apic_write(APIC_EOI, APIC_EOI_ACK); } @@ -329,13 +329,13 @@ void kvm_guest_cpu_init(void) return; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); + u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); #ifdef CONFIG_PREEMPT pa |= KVM_ASYNC_PF_SEND_ALWAYS; #endif wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); - __get_cpu_var(apf_reason).enabled = 1; + __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); } @@ -344,8 +344,8 @@ void kvm_guest_cpu_init(void) unsigned long pa; /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); - __get_cpu_var(kvm_apic_eoi) = 0; - pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) + __this_cpu_write(kvm_apic_eoi, 0); + pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi)) | KVM_MSR_ENABLED; wrmsrl(MSR_KVM_PV_EOI_EN, pa); } @@ -356,11 +356,11 @@ void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__get_cpu_var(apf_reason).enabled) + if (!__this_cpu_read(apf_reason.enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __get_cpu_var(apf_reason).enabled = 0; + __this_cpu_write(apf_reason.enabled, 0); printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", smp_processor_id()); @@ -716,7 +716,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) if (in_nmi()) return; - w = &__get_cpu_var(klock_waiting); + w = this_cpu_ptr(&klock_waiting); cpu = smp_processor_id(); start = spin_time_start(); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ddf742768ecf..1b0e90658d8d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -670,7 +670,7 @@ static int svm_hardware_enable(void *garbage) if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); - __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT; + __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); } @@ -1312,8 +1312,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && - svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) { - __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio; + svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { + __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); } } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bfe11cf124a1..36cf28a910b8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1601,7 +1601,7 @@ static void reload_tss(void) /* * VT restores TR but not its size. Useless. */ - struct desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); struct desc_struct *descs; descs = (void *)gdt->address; @@ -1647,7 +1647,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) static unsigned long segment_base(u16 selector) { - struct desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); struct desc_struct *d; unsigned long table_base; unsigned long v; @@ -1777,7 +1777,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) */ if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) stts(); - load_gdt(&__get_cpu_var(host_gdt)); + load_gdt(this_cpu_ptr(&host_gdt)); } static void vmx_load_host_state(struct vcpu_vmx *vmx) @@ -1807,7 +1807,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } if (vmx->loaded_vmcs->cpu != cpu) { - struct desc_ptr *gdt = &__get_cpu_var(host_gdt); + struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); unsigned long sysenter_esp; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); @@ -2744,7 +2744,7 @@ static int hardware_enable(void *garbage) ept_sync_global(); } - native_store_gdt(&__get_cpu_var(host_gdt)); + native_store_gdt(this_cpu_ptr(&host_gdt)); return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f1e22d3b286..c84ee536f9a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1556,7 +1556,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - this_tsc_khz = __get_cpu_var(cpu_tsc_khz); + this_tsc_khz = __this_cpu_read(cpu_tsc_khz); if (unlikely(this_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index dd89a13f1051..b4f2e7e9e907 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -140,7 +140,7 @@ static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); bool kmemcheck_active(struct pt_regs *regs) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); return data->balance > 0; } @@ -148,7 +148,7 @@ bool kmemcheck_active(struct pt_regs *regs) /* Save an address that needs to be shown/hidden */ static void kmemcheck_save_addr(unsigned long addr) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); data->addr[data->n_addrs++] = addr; @@ -156,7 +156,7 @@ static void kmemcheck_save_addr(unsigned long addr) static unsigned int kmemcheck_show_all(void) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); unsigned int i; unsigned int n; @@ -169,7 +169,7 @@ static unsigned int kmemcheck_show_all(void) static unsigned int kmemcheck_hide_all(void) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); unsigned int i; unsigned int n; @@ -185,7 +185,7 @@ static unsigned int kmemcheck_hide_all(void) */ void kmemcheck_show(struct pt_regs *regs) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); BUG_ON(!irqs_disabled()); @@ -226,7 +226,7 @@ void kmemcheck_show(struct pt_regs *regs) */ void kmemcheck_hide(struct pt_regs *regs) { - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); int n; BUG_ON(!irqs_disabled()); @@ -528,7 +528,7 @@ static void kmemcheck_access(struct pt_regs *regs, const uint8_t *insn_primary; unsigned int size; - struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); + struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); /* Recursive fault -- ouch. */ if (data->busy) { diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 379e8bd0deea..1d2e6392f5fa 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -64,11 +64,11 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) { if (ctr_running) - model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); + model->check_ctrs(regs, this_cpu_ptr(&cpu_msrs)); else if (!nmi_enabled) return NMI_DONE; else - model->stop(&__get_cpu_var(cpu_msrs)); + model->stop(this_cpu_ptr(&cpu_msrs)); return NMI_HANDLED; } @@ -91,7 +91,7 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) static void nmi_cpu_start(void *dummy) { - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs); if (!msrs->controls) WARN_ON_ONCE(1); else @@ -111,7 +111,7 @@ static int nmi_start(void) static void nmi_cpu_stop(void *dummy) { - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs const *msrs = this_cpu_ptr(&cpu_msrs); if (!msrs->controls) WARN_ON_ONCE(1); else diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 5c86786bbfd2..a244237f3cfa 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -365,7 +365,7 @@ __setup("uvrtcevt", uv_enable_evt_rtc); static __init void uv_rtc_register_clockevents(struct work_struct *dummy) { - struct clock_event_device *ced = &__get_cpu_var(cpu_ced); + struct clock_event_device *ced = this_cpu_ptr(&cpu_ced); *ced = clock_event_device_uv; ced->cpumask = cpumask_of(smp_processor_id()); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0cb11fb5008..2628ee556756 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -821,7 +821,7 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, void xen_copy_trap_info(struct trap_info *traps) { - const struct desc_ptr *desc = &__get_cpu_var(idt_desc); + const struct desc_ptr *desc = this_cpu_ptr(&idt_desc); xen_convert_trap_info(desc, traps); } @@ -838,7 +838,7 @@ static void xen_load_idt(const struct desc_ptr *desc) spin_lock(&lock); - __get_cpu_var(idt_desc) = *desc; + memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc)); xen_convert_trap_info(desc, traps); diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 0d82003e76ad..ea54a08d8301 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -54,7 +54,7 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); void xen_mc_flush(void) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct multicall_entry *mc; int ret = 0; unsigned long flags; @@ -131,7 +131,7 @@ void xen_mc_flush(void) struct multicall_space __xen_mc_entry(size_t args) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct multicall_space ret; unsigned argidx = roundup(b->argidx, sizeof(u64)); @@ -162,7 +162,7 @@ struct multicall_space __xen_mc_entry(size_t args) struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct multicall_space ret = { NULL, NULL }; BUG_ON(preemptible()); @@ -192,7 +192,7 @@ struct multicall_space xen_mc_extend_args(unsigned long op, size_t size) void xen_mc_callback(void (*fn)(void *), void *data) { - struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct mc_buffer *b = this_cpu_ptr(&mc_buffer); struct callback *cb; if (b->cbidx == MC_BATCH) { diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 0ba5f3b967f0..23b45eb9a89c 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -109,7 +109,7 @@ static bool xen_pvspin = true; __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) { int irq = __this_cpu_read(lock_kicker_irq); - struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); + struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting); int cpu = smp_processor_id(); u64 start; unsigned long flags; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5718b0b58b60..a1d430b112b3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -80,7 +80,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) BUG_ON(preemptible()); - state = &__get_cpu_var(xen_runstate); + state = this_cpu_ptr(&xen_runstate); /* * The runstate info is always updated by the hypervisor on @@ -123,7 +123,7 @@ static void do_stolen_accounting(void) WARN_ON(state.state != RUNSTATE_running); - snap = &__get_cpu_var(xen_runstate_snapshot); + snap = this_cpu_ptr(&xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; @@ -158,7 +158,7 @@ cycle_t xen_clocksource_read(void) cycle_t ret; preempt_disable_notrace(); - src = &__get_cpu_var(xen_vcpu)->time; + src = this_cpu_ptr(&xen_vcpu->time); ret = pvclock_clocksource_read(src); preempt_enable_notrace(); return ret; @@ -397,7 +397,7 @@ static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt. static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt; + struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt); irqreturn_t ret; ret = IRQ_NONE; @@ -460,7 +460,7 @@ void xen_setup_cpu_clockevents(void) { BUG_ON(preemptible()); - clockevents_register_device(&__get_cpu_var(xen_clock_events).evt); + clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); } void xen_timer_resume(void) -- GitLab From e16321709c8270f9803bbfdb51e5e02235078c7f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:41 -0500 Subject: [PATCH 0267/1868] uv: Replace __get_cpu_var Use __this_cpu_read instead. Cc: Hedi Berriche Cc: Mike Travis Cc: Dimitri Sivanich Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/uv/uv_hub.h | 10 ++++---- arch/x86/platform/uv/uv_nmi.c | 40 ++++++++++++++++---------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index bb84cfd5a1a1..a00ad8f2a657 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -601,16 +601,16 @@ struct uv_hub_nmi_s { struct uv_cpu_nmi_s { struct uv_hub_nmi_s *hub; - atomic_t state; - atomic_t pinging; + int state; + int pinging; int queries; int pings; }; -DECLARE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); -#define uv_cpu_nmi (__get_cpu_var(__uv_cpu_nmi)) +DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); + #define uv_hub_nmi (uv_cpu_nmi.hub) -#define uv_cpu_nmi_per(cpu) (per_cpu(__uv_cpu_nmi, cpu)) +#define uv_cpu_nmi_per(cpu) (per_cpu(uv_cpu_nmi, cpu)) #define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub) /* uv_cpu_nmi_states */ diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c89c93320c12..c6b146e67116 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -63,8 +63,8 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list; -DEFINE_PER_CPU(struct uv_cpu_nmi_s, __uv_cpu_nmi); -EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_nmi); +DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); +EXPORT_PER_CPU_SYMBOL_GPL(uv_cpu_nmi); static unsigned long nmi_mmr; static unsigned long nmi_mmr_clear; @@ -215,7 +215,7 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) int nmi = 0; local64_inc(&uv_nmi_count); - uv_cpu_nmi.queries++; + this_cpu_inc(uv_cpu_nmi.queries); do { nmi = atomic_read(&hub_nmi->in_nmi); @@ -293,7 +293,7 @@ static void uv_nmi_nr_cpus_ping(void) int cpu; for_each_cpu(cpu, uv_nmi_cpu_mask) - atomic_set(&uv_cpu_nmi_per(cpu).pinging, 1); + uv_cpu_nmi_per(cpu).pinging = 1; apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI); } @@ -304,8 +304,8 @@ static void uv_nmi_cleanup_mask(void) int cpu; for_each_cpu(cpu, uv_nmi_cpu_mask) { - atomic_set(&uv_cpu_nmi_per(cpu).pinging, 0); - atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_OUT); + uv_cpu_nmi_per(cpu).pinging = 0; + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_OUT; cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); } } @@ -328,7 +328,7 @@ static int uv_nmi_wait_cpus(int first) int loop_delay = uv_nmi_loop_delay; for_each_cpu(j, uv_nmi_cpu_mask) { - if (atomic_read(&uv_cpu_nmi_per(j).state)) { + if (uv_cpu_nmi_per(j).state) { cpumask_clear_cpu(j, uv_nmi_cpu_mask); if (++k >= n) break; @@ -359,7 +359,7 @@ static int uv_nmi_wait_cpus(int first) static void uv_nmi_wait(int master) { /* indicate this cpu is in */ - atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_IN); + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN); /* if not the first cpu in (the master), then we are a slave cpu */ if (!master) @@ -419,7 +419,7 @@ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) "UV:%sNMI process trace for CPU %d\n", dots, cpu); show_regs(regs); } - atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); } /* Trigger a slave cpu to dump it's state */ @@ -427,20 +427,20 @@ static void uv_nmi_trigger_dump(int cpu) { int retry = uv_nmi_trigger_delay; - if (atomic_read(&uv_cpu_nmi_per(cpu).state) != UV_NMI_STATE_IN) + if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_IN) return; - atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP); + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP; do { cpu_relax(); udelay(10); - if (atomic_read(&uv_cpu_nmi_per(cpu).state) + if (uv_cpu_nmi_per(cpu).state != UV_NMI_STATE_DUMP) return; } while (--retry > 0); pr_crit("UV: CPU %d stuck in process dump function\n", cpu); - atomic_set(&uv_cpu_nmi_per(cpu).state, UV_NMI_STATE_DUMP_DONE); + uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE; } /* Wait until all cpus ready to exit */ @@ -488,7 +488,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) } else { while (!atomic_read(&uv_nmi_slave_continue)) cpu_relax(); - while (atomic_read(&uv_cpu_nmi.state) != UV_NMI_STATE_DUMP) + while (this_cpu_read(uv_cpu_nmi.state) != UV_NMI_STATE_DUMP) cpu_relax(); uv_nmi_dump_state_cpu(cpu, regs); } @@ -615,7 +615,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) local_irq_save(flags); /* If not a UV System NMI, ignore */ - if (!atomic_read(&uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) { + if (!this_cpu_read(uv_cpu_nmi.pinging) && !uv_check_nmi(hub_nmi)) { local_irq_restore(flags); return NMI_DONE; } @@ -639,7 +639,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) uv_call_kgdb_kdb(cpu, regs, master); /* Clear per_cpu "in nmi" flag */ - atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT); /* Clear MMR NMI flag on each hub */ uv_clear_nmi(cpu); @@ -666,16 +666,16 @@ static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) { int ret; - uv_cpu_nmi.queries++; - if (!atomic_read(&uv_cpu_nmi.pinging)) { + this_cpu_inc(uv_cpu_nmi.queries); + if (!this_cpu_read(uv_cpu_nmi.pinging)) { local64_inc(&uv_nmi_ping_misses); return NMI_DONE; } - uv_cpu_nmi.pings++; + this_cpu_inc(uv_cpu_nmi.pings); local64_inc(&uv_nmi_ping_count); ret = uv_handle_nmi(reason, regs); - atomic_set(&uv_cpu_nmi.pinging, 0); + this_cpu_write(uv_cpu_nmi.pinging, 0); return ret; } -- GitLab From 06b96c8beb940619ddc818e2e00915fbc524f807 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:42 -0500 Subject: [PATCH 0268/1868] arm: Replace __this_cpu_ptr with raw_cpu_ptr __this_cpu_ptr is being phased out. So replace with raw_cpu_ptr. Cc: Russell King Cc: Catalin Marinas Acked-by: Will Deacon Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/arm/kernel/smp_twd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index dfc32130bc44..93090213c71c 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -92,7 +92,7 @@ static int twd_timer_ack(void) static void twd_timer_stop(void) { - struct clock_event_device *clk = __this_cpu_ptr(twd_evt); + struct clock_event_device *clk = raw_cpu_ptr(twd_evt); twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk); disable_percpu_irq(clk->irq); @@ -108,7 +108,7 @@ static void twd_update_frequency(void *new_rate) { twd_timer_rate = *((unsigned long *) new_rate); - clockevents_update_freq(__this_cpu_ptr(twd_evt), twd_timer_rate); + clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate); } static int twd_rate_change(struct notifier_block *nb, @@ -134,7 +134,7 @@ static struct notifier_block twd_clk_nb = { static int twd_clk_init(void) { - if (twd_evt && __this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) return clk_notifier_register(twd_clk, &twd_clk_nb); return 0; @@ -153,7 +153,7 @@ static void twd_update_frequency(void *data) { twd_timer_rate = clk_get_rate(twd_clk); - clockevents_update_freq(__this_cpu_ptr(twd_evt), twd_timer_rate); + clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate); } static int twd_cpufreq_transition(struct notifier_block *nb, @@ -179,7 +179,7 @@ static struct notifier_block twd_cpufreq_nb = { static int twd_cpufreq_init(void) { - if (twd_evt && __this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) return cpufreq_register_notifier(&twd_cpufreq_nb, CPUFREQ_TRANSITION_NOTIFIER); @@ -269,7 +269,7 @@ static void twd_get_clock(struct device_node *np) */ static void twd_timer_setup(void) { - struct clock_event_device *clk = __this_cpu_ptr(twd_evt); + struct clock_event_device *clk = raw_cpu_ptr(twd_evt); int cpu = smp_processor_id(); /* -- GitLab From d1cd39ad583e36f3a945ba043a0a2bfae83fe859 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:43 -0500 Subject: [PATCH 0269/1868] MIPS: Replace __get_cpu_var uses in FPU emulator. The use of __this_cpu_inc() requires a fundamental integer type, so change the type of all the counters to unsigned long, which is the same width they were before, but not wrapped in local_t. Signed-off-by: David Daney Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/mips/include/asm/fpu_emulator.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 0195745b4b1b..3ee347713307 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -33,17 +33,17 @@ #ifdef CONFIG_DEBUG_FS struct mips_fpu_emulator_stats { - local_t emulated; - local_t loads; - local_t stores; - local_t cp1ops; - local_t cp1xops; - local_t errors; - local_t ieee754_inexact; - local_t ieee754_underflow; - local_t ieee754_overflow; - local_t ieee754_zerodiv; - local_t ieee754_invalidop; + unsigned long emulated; + unsigned long loads; + unsigned long stores; + unsigned long cp1ops; + unsigned long cp1xops; + unsigned long errors; + unsigned long ieee754_inexact; + unsigned long ieee754_underflow; + unsigned long ieee754_overflow; + unsigned long ieee754_zerodiv; + unsigned long ieee754_invalidop; }; DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); @@ -51,7 +51,7 @@ DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); #define MIPS_FPU_EMU_INC_STATS(M) \ do { \ preempt_disable(); \ - __local_inc(&__get_cpu_var(fpuemustats).M); \ + __this_cpu_inc(fpuemustats.M); \ preempt_enable(); \ } while (0) -- GitLab From 35898716b4d3382791d219be317faace580b6a41 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:44 -0500 Subject: [PATCH 0270/1868] mips: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Ralf Baechle Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/mips/cavium-octeon/octeon-irq.c | 30 ++++++++++++++-------------- arch/mips/kernel/kprobes.c | 6 +++--- arch/mips/kernel/perf_event_mipsxx.c | 14 ++++++------- arch/mips/kernel/smp-bmips.c | 2 +- arch/mips/loongson/loongson-3/smp.c | 6 +++--- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 1b82ac6921e0..741734049675 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -264,13 +264,13 @@ static void octeon_irq_ciu_enable_local(struct irq_data *data) unsigned long *pen; unsigned long flags; union octeon_ciu_chip_data cd; - raw_spinlock_t *lock = &__get_cpu_var(octeon_irq_ciu_spinlock); + raw_spinlock_t *lock = this_cpu_ptr(&octeon_irq_ciu_spinlock); cd.p = irq_data_get_irq_chip_data(data); raw_spin_lock_irqsave(lock, flags); if (cd.s.line == 0) { - pen = &__get_cpu_var(octeon_irq_ciu0_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu0_en_mirror); __set_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -279,7 +279,7 @@ static void octeon_irq_ciu_enable_local(struct irq_data *data) wmb(); cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen); } else { - pen = &__get_cpu_var(octeon_irq_ciu1_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu1_en_mirror); __set_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -296,13 +296,13 @@ static void octeon_irq_ciu_disable_local(struct irq_data *data) unsigned long *pen; unsigned long flags; union octeon_ciu_chip_data cd; - raw_spinlock_t *lock = &__get_cpu_var(octeon_irq_ciu_spinlock); + raw_spinlock_t *lock = this_cpu_ptr(&octeon_irq_ciu_spinlock); cd.p = irq_data_get_irq_chip_data(data); raw_spin_lock_irqsave(lock, flags); if (cd.s.line == 0) { - pen = &__get_cpu_var(octeon_irq_ciu0_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu0_en_mirror); __clear_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -311,7 +311,7 @@ static void octeon_irq_ciu_disable_local(struct irq_data *data) wmb(); cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen); } else { - pen = &__get_cpu_var(octeon_irq_ciu1_en_mirror); + pen = this_cpu_ptr(&octeon_irq_ciu1_en_mirror); __clear_bit(cd.s.bit, pen); /* * Must be visible to octeon_irq_ip{2,3}_ciu() before @@ -431,11 +431,11 @@ static void octeon_irq_ciu_enable_local_v2(struct irq_data *data) if (cd.s.line == 0) { int index = cvmx_get_core_num() * 2; - set_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu0_en_mirror)); + set_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask); } else { int index = cvmx_get_core_num() * 2 + 1; - set_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu1_en_mirror)); + set_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask); } } @@ -450,11 +450,11 @@ static void octeon_irq_ciu_disable_local_v2(struct irq_data *data) if (cd.s.line == 0) { int index = cvmx_get_core_num() * 2; - clear_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu0_en_mirror)); + clear_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu0_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask); } else { int index = cvmx_get_core_num() * 2 + 1; - clear_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu1_en_mirror)); + clear_bit(cd.s.bit, this_cpu_ptr(&octeon_irq_ciu1_en_mirror)); cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask); } } @@ -1063,7 +1063,7 @@ static void octeon_irq_ip2_ciu(void) const unsigned long core_id = cvmx_get_core_num(); u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INTX_SUM0(core_id * 2)); - ciu_sum &= __get_cpu_var(octeon_irq_ciu0_en_mirror); + ciu_sum &= __this_cpu_read(octeon_irq_ciu0_en_mirror); if (likely(ciu_sum)) { int bit = fls64(ciu_sum) - 1; int irq = octeon_irq_ciu_to_irq[0][bit]; @@ -1080,7 +1080,7 @@ static void octeon_irq_ip3_ciu(void) { u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INT_SUM1); - ciu_sum &= __get_cpu_var(octeon_irq_ciu1_en_mirror); + ciu_sum &= __this_cpu_read(octeon_irq_ciu1_en_mirror); if (likely(ciu_sum)) { int bit = fls64(ciu_sum) - 1; int irq = octeon_irq_ciu_to_irq[1][bit]; @@ -1129,10 +1129,10 @@ static void octeon_irq_init_ciu_percpu(void) int coreid = cvmx_get_core_num(); - __get_cpu_var(octeon_irq_ciu0_en_mirror) = 0; - __get_cpu_var(octeon_irq_ciu1_en_mirror) = 0; + __this_cpu_write(octeon_irq_ciu0_en_mirror, 0); + __this_cpu_write(octeon_irq_ciu1_en_mirror, 0); wmb(); - raw_spin_lock_init(&__get_cpu_var(octeon_irq_ciu_spinlock)); + raw_spin_lock_init(this_cpu_ptr(&octeon_irq_ciu_spinlock)); /* * Disable All CIU Interrupts. The ones we need will be * enabled later. Read the SUM register so we know the write diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 1f8187ab0997..212f46f2014e 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -224,7 +224,7 @@ static void save_previous_kprobe(struct kprobe_ctlblk *kcb) static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_old_SR = kcb->prev_kprobe.old_SR; kcb->kprobe_saved_SR = kcb->prev_kprobe.saved_SR; @@ -234,7 +234,7 @@ static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_SR = kcb->kprobe_old_SR = (regs->cp0_status & ST0_IE); kcb->kprobe_saved_epc = regs->cp0_epc; } @@ -385,7 +385,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) goto ss_probe; } diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 14bf74b0f51c..abb209fa28c6 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -340,7 +340,7 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc, static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -360,7 +360,7 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) static void mipsxx_pmu_disable_event(int idx) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long flags; WARN_ON(idx < 0 || idx >= mipspmu.num_counters); @@ -460,7 +460,7 @@ static void mipspmu_stop(struct perf_event *event, int flags) static int mipspmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -496,7 +496,7 @@ static int mipspmu_add(struct perf_event *event, int flags) static void mipspmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -1275,7 +1275,7 @@ static int __hw_perf_event_init(struct perf_event *event) static void pause_local_counters(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int ctr = mipspmu.num_counters; unsigned long flags; @@ -1291,7 +1291,7 @@ static void pause_local_counters(void) static void resume_local_counters(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int ctr = mipspmu.num_counters; do { @@ -1302,7 +1302,7 @@ static void resume_local_counters(void) static int mipsxx_pmu_handle_shared_irq(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_sample_data data; unsigned int counters = mipspmu.num_counters; u64 counter; diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index df9e2bd9b2c2..06bb5ed6d80a 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -346,7 +346,7 @@ static irqreturn_t bmips43xx_ipi_interrupt(int irq, void *dev_id) int action, cpu = irq - IPI0_IRQ; spin_lock_irqsave(&ipi_lock, flags); - action = __get_cpu_var(ipi_action_mask); + action = __this_cpu_read(ipi_action_mask); per_cpu(ipi_action_mask, cpu) = 0; clear_c0_cause(cpu ? C_SW1 : C_SW0); spin_unlock_irqrestore(&ipi_lock, flags); diff --git a/arch/mips/loongson/loongson-3/smp.c b/arch/mips/loongson/loongson-3/smp.c index 74e827b4ec8f..d8c63af6c7cc 100644 --- a/arch/mips/loongson/loongson-3/smp.c +++ b/arch/mips/loongson/loongson-3/smp.c @@ -299,16 +299,16 @@ static void loongson3_init_secondary(void) per_cpu(cpu_state, cpu) = CPU_ONLINE; i = 0; - __get_cpu_var(core0_c0count) = 0; + __this_cpu_write(core0_c0count, 0); loongson3_send_ipi_single(0, SMP_ASK_C0COUNT); - while (!__get_cpu_var(core0_c0count)) { + while (!__this_cpu_read(core0_c0count)) { i++; cpu_relax(); } if (i > MAX_LOOPS) i = MAX_LOOPS; - initcount = __get_cpu_var(core0_c0count) + i; + initcount = __this_cpu_read(core0_c0count) + i; write_c0_count(initcount); } -- GitLab From eb7e7d766326f70859046bfdb6277068c2461fe2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:45 -0500 Subject: [PATCH 0271/1868] s390: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to this_cpu_inc(y) Cc: Martin Schwidefsky CC: linux390@de.ibm.com Acked-by: Heiko Carstens Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/s390/include/asm/cputime.h | 2 +- arch/s390/include/asm/irq.h | 2 +- arch/s390/include/asm/percpu.h | 16 ++++++++-------- arch/s390/kernel/irq.c | 2 +- arch/s390/kernel/kprobes.c | 8 ++++---- arch/s390/kernel/nmi.c | 10 +++++++--- arch/s390/kernel/perf_cpum_cf.c | 22 +++++++++++----------- arch/s390/kernel/perf_cpum_sf.c | 16 ++++++++-------- arch/s390/kernel/processor.c | 4 ++-- arch/s390/kernel/time.c | 6 +++--- arch/s390/kernel/vtime.c | 2 +- arch/s390/oprofile/hwsampler.c | 2 +- 12 files changed, 48 insertions(+), 44 deletions(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index f65bd3634519..692d310dc32d 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -184,7 +184,7 @@ cputime64_t s390_get_idle_time(int cpu); static inline int s390_nohz_delay(int cpu) { - return __get_cpu_var(s390_idle).nohz_delay != 0; + return __this_cpu_read(s390_idle.nohz_delay) != 0; } #define arch_needs_cpu(cpu) s390_nohz_delay(cpu) diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index c4dd400a2791..713d325afbfe 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); static __always_inline void inc_irq_stat(enum interruption_class irq) { - __get_cpu_var(irq_stat).irqs[irq]++; + __this_cpu_inc(irq_stat.irqs[irq]); } struct ext_code { diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index fa91e0097458..933355e0d091 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ old__, new__, prev__; \ pcp_op_T__ *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ prev__ = *ptr__; \ do { \ old__ = prev__; \ @@ -70,7 +70,7 @@ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ if (__builtin_constant_p(val__) && \ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ asm volatile( \ @@ -97,7 +97,7 @@ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ @@ -116,7 +116,7 @@ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ @@ -138,7 +138,7 @@ pcp_op_T__ ret__; \ pcp_op_T__ *ptr__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = cmpxchg(ptr__, oval, nval); \ preempt_enable(); \ ret__; \ @@ -154,7 +154,7 @@ typeof(pcp) *ptr__; \ typeof(pcp) ret__; \ preempt_disable(); \ - ptr__ = __this_cpu_ptr(&(pcp)); \ + ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = xchg(ptr__, nval); \ preempt_enable(); \ ret__; \ @@ -173,8 +173,8 @@ typeof(pcp2) *p2__; \ int ret__; \ preempt_disable(); \ - p1__ = __this_cpu_ptr(&(pcp1)); \ - p2__ = __this_cpu_ptr(&(pcp2)); \ + p1__ = raw_cpu_ptr(&(pcp1)); \ + p2__ = raw_cpu_ptr(&(pcp2)); \ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ preempt_enable(); \ ret__; \ diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8eb82443cfbd..891c183211ce 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -258,7 +258,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) - __get_cpu_var(s390_idle).nohz_delay = 1; + __this_cpu_write(s390_idle.nohz_delay, 1); index = ext_hash(ext_code.code); rcu_read_lock(); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index bc71a7b95af5..131ed342ed10 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -366,9 +366,9 @@ static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb, */ static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) { - kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe); + kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe); kcb->prev_kprobe.status = kcb->kprobe_status; - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } /* @@ -378,7 +378,7 @@ static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p) */ static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; } @@ -459,7 +459,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn); return 1; } else if (kprobe_running()) { - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { /* * Continuation after the jprobe completed and diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 210e1285f75a..d75c42f4147d 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -53,8 +53,12 @@ void s390_handle_mcck(void) */ local_irq_save(flags); local_mcck_disable(); - mcck = __get_cpu_var(cpu_mcck); - memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct)); + /* + * Ummm... Does this make sense at all? Copying the percpu struct + * and then zapping it one statement later? + */ + memcpy(&mcck, this_cpu_ptr(&cpu_mcck), sizeof(mcck)); + memset(&mcck, 0, sizeof(struct mcck_struct)); clear_cpu_flag(CIF_MCCK_PENDING); local_mcck_enable(); local_irq_restore(flags); @@ -253,7 +257,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs) nmi_enter(); inc_irq_stat(NMI_NMI); mci = (struct mci *) &S390_lowcore.mcck_interruption_code; - mcck = &__get_cpu_var(cpu_mcck); + mcck = this_cpu_ptr(&cpu_mcck); umode = user_mode(regs); if (mci->sd) { diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index d3194de7ae1e..56fdad479115 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -173,7 +173,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); int err; if (cpuhw->flags & PMU_F_ENABLED) @@ -196,7 +196,7 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); int err; u64 inactive; @@ -230,7 +230,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_CMC); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); /* Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ @@ -250,7 +250,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, #define PMC_RELEASE 1 static void setup_pmc_cpu(void *flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); switch (*((int *) flags)) { case PMC_INIT: @@ -475,7 +475,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) @@ -506,7 +506,7 @@ static void cpumf_pmu_start(struct perf_event *event, int flags) static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; if (!(hwc->state & PERF_HES_STOPPED)) { @@ -527,7 +527,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); /* Check authorization for the counter set to which this * counter belongs. @@ -551,7 +551,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -575,7 +575,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) */ static void cpumf_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->flags |= PERF_EVENT_TXN; @@ -589,7 +589,7 @@ static void cpumf_pmu_start_txn(struct pmu *pmu) */ static void cpumf_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); WARN_ON(cpuhw->tx_state != cpuhw->state); @@ -604,7 +604,7 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu) */ static int cpumf_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); u64 state; /* check if the updated state can be scheduled */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ea0c7b2ef030..08e761318c17 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -562,7 +562,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static void setup_pmc_cpu(void *flags) { int err; - struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); err = 0; switch (*((int *) flags)) { @@ -849,7 +849,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) static void cpumsf_pmu_enable(struct pmu *pmu) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); struct hw_perf_event *hwc; int err; @@ -898,7 +898,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) static void cpumsf_pmu_disable(struct pmu *pmu) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); struct hws_lsctl_request_block inactive; struct hws_qsi_info_block si; int err; @@ -1306,7 +1306,7 @@ static void cpumsf_pmu_read(struct perf_event *event) */ static void cpumsf_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) return; @@ -1327,7 +1327,7 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags) */ static void cpumsf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); if (event->hw.state & PERF_HES_STOPPED) return; @@ -1346,7 +1346,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) static int cpumsf_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); int err; if (cpuhw->flags & PMU_F_IN_USE) @@ -1397,7 +1397,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) static void cpumsf_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); perf_pmu_disable(event->pmu); cpumsf_pmu_stop(event, PERF_EF_UPDATE); @@ -1470,7 +1470,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, if (!(alert & CPU_MF_INT_SF_MASK)) return; inc_irq_stat(IRQEXT_CMS); - cpuhw = &__get_cpu_var(cpu_hw_sf); + cpuhw = this_cpu_ptr(&cpu_hw_sf); /* Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 24612029f450..f0305b1189aa 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -23,8 +23,8 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); */ void cpu_init(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - struct cpuid *id = &__get_cpu_var(cpu_id); + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + struct cpuid *id = this_cpu_ptr(&cpu_id); get_cpu_id(id); atomic_inc(&init_mm.mm_count); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4cef607f3711..4e5a6d881c62 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -92,7 +92,7 @@ void clock_comparator_work(void) struct clock_event_device *cd; S390_lowcore.clock_comparator = -1ULL; - cd = &__get_cpu_var(comparators); + cd = this_cpu_ptr(&comparators); cd->event_handler(cd); } @@ -360,7 +360,7 @@ EXPORT_SYMBOL(get_sync_clock); */ static void disable_sync_clock(void *dummy) { - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); /* * Clear the in-sync bit 2^31. All get_sync_clock calls will * fail until the sync bit is turned back on. In addition @@ -377,7 +377,7 @@ static void disable_sync_clock(void *dummy) */ static void enable_sync_clock(void) { - atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word); + atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word); atomic_set_mask(0x80000000, sw_ptr); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8c34363d6f1e..f400745dedc0 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system); void __kprobes vtime_stop_cpu(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); unsigned long long idle_time; unsigned long psw_mask; diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index e53c6f268807..ff9b4eb34589 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -178,7 +178,7 @@ static int smp_ctl_qsi(int cpu) static void hws_ext_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { - struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer); + struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer); if (!(param32 & CPU_MF_INT_SF_MASK)) return; -- GitLab From 0bf7fcf155160fd483af7ffdc50efd4be96f1c96 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:46 -0500 Subject: [PATCH 0272/1868] s390: cio driver &__get_cpu_var replacements Use this_cpu_ptr() instead of &__get_cpu_var() Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/s390/cio/ccwreq.c | 2 +- drivers/s390/cio/chsc_sch.c | 2 +- drivers/s390/cio/cio.c | 6 +++--- drivers/s390/cio/device_fsm.c | 4 ++-- drivers/s390/cio/eadm_sch.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c index 07676c22d514..79f59915f71b 100644 --- a/drivers/s390/cio/ccwreq.c +++ b/drivers/s390/cio/ccwreq.c @@ -252,7 +252,7 @@ static void ccwreq_log_status(struct ccw_device *cdev, enum io_status status) */ void ccw_request_handler(struct ccw_device *cdev) { - struct irb *irb = &__get_cpu_var(cio_irb); + struct irb *irb = this_cpu_ptr(&cio_irb); struct ccw_request *req = &cdev->private->req; enum io_status status; int rc = -EOPNOTSUPP; diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 3d22d2a4ce14..213159dec89e 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -58,7 +58,7 @@ static void chsc_subchannel_irq(struct subchannel *sch) { struct chsc_private *private = dev_get_drvdata(&sch->dev); struct chsc_request *request = private->request; - struct irb *irb = &__get_cpu_var(cio_irb); + struct irb *irb = this_cpu_ptr(&cio_irb); CHSC_LOG(4, "irb"); CHSC_LOG_HEX(4, irb, sizeof(*irb)); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 2905d8b0ec95..dacdaaa44919 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -563,7 +563,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy) __this_cpu_write(s390_idle.nohz_delay, 1); tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; - irb = &__get_cpu_var(cio_irb); + irb = this_cpu_ptr(&cio_irb); sch = (struct subchannel *)(unsigned long) tpi_info->intparm; if (!sch) { /* Clear pending interrupt condition. */ @@ -613,7 +613,7 @@ void cio_tsch(struct subchannel *sch) struct irb *irb; int irq_context; - irb = &__get_cpu_var(cio_irb); + irb = this_cpu_ptr(&cio_irb); /* Store interrupt response block to lowcore. */ if (tsch(sch->schid, irb) != 0) /* Not status pending or not operational. */ @@ -751,7 +751,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid) struct tpi_info ti; if (tpi(&ti)) { - tsch(ti.schid, &__get_cpu_var(cio_irb)); + tsch(ti.schid, this_cpu_ptr(&cio_irb)); if (schid_equal(&ti.schid, &schid)) return 0; } diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 0bc902b3cd84..83da53c8e54c 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -739,7 +739,7 @@ ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event) struct irb *irb; int is_cmd; - irb = &__get_cpu_var(cio_irb); + irb = this_cpu_ptr(&cio_irb); is_cmd = !scsw_is_tm(&irb->scsw); /* Check for unsolicited interrupt. */ if (!scsw_is_solicited(&irb->scsw)) { @@ -805,7 +805,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event) { struct irb *irb; - irb = &__get_cpu_var(cio_irb); + irb = this_cpu_ptr(&cio_irb); /* Check for unsolicited interrupt. */ if (scsw_stctl(&irb->scsw) == (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)) { diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c index c4f7bf3e24c2..37f0834300ea 100644 --- a/drivers/s390/cio/eadm_sch.c +++ b/drivers/s390/cio/eadm_sch.c @@ -134,7 +134,7 @@ static void eadm_subchannel_irq(struct subchannel *sch) { struct eadm_private *private = get_eadm_private(sch); struct eadm_scsw *scsw = &sch->schib.scsw.eadm; - struct irb *irb = &__get_cpu_var(cio_irb); + struct irb *irb = this_cpu_ptr(&cio_irb); int error = 0; EADM_LOG(6, "irq"); -- GitLab From 6065a244a039a23d933e4b803a4e052da2849208 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:47 -0500 Subject: [PATCH 0273/1868] ia64: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/ia64/include/asm/hw_irq.h | 2 +- arch/ia64/include/asm/sn/arch.h | 4 ++-- arch/ia64/include/asm/sn/nodepda.h | 2 +- arch/ia64/include/asm/switch_to.h | 2 +- arch/ia64/include/asm/uv/uv_hub.h | 2 +- arch/ia64/kernel/irq.c | 2 +- arch/ia64/kernel/irq_ia64.c | 4 ++-- arch/ia64/kernel/kprobes.c | 6 +++--- arch/ia64/kernel/mca.c | 16 ++++++++-------- arch/ia64/kernel/process.c | 6 +++--- arch/ia64/kernel/traps.c | 2 +- arch/ia64/sn/kernel/sn2/sn2_smp.c | 28 ++++++++++++++-------------- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h index 029bab36cd91..668786e84af8 100644 --- a/arch/ia64/include/asm/hw_irq.h +++ b/arch/ia64/include/asm/hw_irq.h @@ -159,7 +159,7 @@ static inline ia64_vector __ia64_irq_to_vector(int irq) static inline unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return __get_cpu_var(vector_irq)[vec]; + return __this_cpu_read(vector_irq[vec]); } #endif diff --git a/arch/ia64/include/asm/sn/arch.h b/arch/ia64/include/asm/sn/arch.h index 7caa1f44cd95..31eb784866f8 100644 --- a/arch/ia64/include/asm/sn/arch.h +++ b/arch/ia64/include/asm/sn/arch.h @@ -57,7 +57,7 @@ struct sn_hub_info_s { u16 nasid_bitmask; }; DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); -#define sn_hub_info (&__get_cpu_var(__sn_hub_info)) +#define sn_hub_info this_cpu_ptr(&__sn_hub_info) #define is_shub2() (sn_hub_info->shub2) #define is_shub1() (sn_hub_info->shub2 == 0) @@ -72,7 +72,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * cpu. */ DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); -#define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) +#define sn_cnodeid_to_nasid this_cpu_ptr(&__sn_cnodeid_to_nasid[0]) extern u8 sn_partition_id; diff --git a/arch/ia64/include/asm/sn/nodepda.h b/arch/ia64/include/asm/sn/nodepda.h index ee118b901de4..7c8b4710f071 100644 --- a/arch/ia64/include/asm/sn/nodepda.h +++ b/arch/ia64/include/asm/sn/nodepda.h @@ -70,7 +70,7 @@ typedef struct nodepda_s nodepda_t; */ DECLARE_PER_CPU(struct nodepda_s *, __sn_nodepda); -#define sn_nodepda (__get_cpu_var(__sn_nodepda)) +#define sn_nodepda __this_cpu_read(__sn_nodepda) #define NODEPDA(cnodeid) (sn_nodepda->pernode_pdaindr[cnodeid]) /* diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h index d38c7ea5eea5..e8f3585e7e7a 100644 --- a/arch/ia64/include/asm/switch_to.h +++ b/arch/ia64/include/asm/switch_to.h @@ -32,7 +32,7 @@ extern void ia64_load_extra (struct task_struct *task); #ifdef CONFIG_PERFMON DECLARE_PER_CPU(unsigned long, pfm_syst_info); -# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) +# define PERFMON_IS_SYSWIDE() (__this_cpu_read(pfm_syst_info) & 0x1) #else # define PERFMON_IS_SYSWIDE() (0) #endif diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h index 53e9dfacd073..2a88c7204e52 100644 --- a/arch/ia64/include/asm/uv/uv_hub.h +++ b/arch/ia64/include/asm/uv/uv_hub.h @@ -108,7 +108,7 @@ struct uv_hub_info_s { unsigned char n_val; }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); -#define uv_hub_info (&__get_cpu_var(__uv_hub_info)) +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) /* diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index f2c418281130..812a1e6b3179 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -42,7 +42,7 @@ ia64_vector __ia64_irq_to_vector(int irq) unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return __get_cpu_var(vector_irq)[vec]; + return __this_cpu_read(vector_irq[vec]); } #endif diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 03ea78ed64a9..698d8fefde6c 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -330,7 +330,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) int irq; struct irq_desc *desc; struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); if (irq < 0) continue; @@ -344,7 +344,7 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) goto unlock; spin_lock_irqsave(&vector_lock, flags); - __get_cpu_var(vector_irq)[vector] = -1; + __this_cpu_write(vector_irq[vector], -1); cpu_clear(me, vector_table[vector]); spin_unlock_irqrestore(&vector_lock, flags); cfg->move_cleanup_count--; diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 074fde49c9e6..c7c51445c3be 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -396,7 +396,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { unsigned int i; i = atomic_read(&kcb->prev_kprobe_index); - __get_cpu_var(current_kprobe) = kcb->prev_kprobe[i-1].kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe[i-1].kp); kcb->kprobe_status = kcb->prev_kprobe[i-1].status; atomic_sub(1, &kcb->prev_kprobe_index); } @@ -404,7 +404,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } static void kretprobe_trampoline(void) @@ -823,7 +823,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) /* * jprobe instrumented function just completed */ - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index db7b36bb068b..8bfd36af46f8 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1341,7 +1341,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mlogbuf_finish(1); } - if (__get_cpu_var(ia64_mca_tr_reload)) { + if (__this_cpu_read(ia64_mca_tr_reload)) { mca_insert_tr(0x1); /*Reload dynamic itrs*/ mca_insert_tr(0x2); /*Reload dynamic itrs*/ } @@ -1868,14 +1868,14 @@ ia64_mca_cpu_init(void *cpu_data) "MCA", cpu); format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), "INIT", cpu); - __get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data); + __this_cpu_write(ia64_mca_data, (__per_cpu_mca[cpu] = __pa(data))); /* * Stash away a copy of the PTE needed to map the per-CPU page. * We may need it during MCA recovery. */ - __get_cpu_var(ia64_mca_per_cpu_pte) = - pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); + __this_cpu_write(ia64_mca_per_cpu_pte, + pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL))); /* * Also, stash away a copy of the PAL address and the PTE @@ -1884,10 +1884,10 @@ ia64_mca_cpu_init(void *cpu_data) pal_vaddr = efi_get_pal_addr(); if (!pal_vaddr) return; - __get_cpu_var(ia64_mca_pal_base) = - GRANULEROUNDDOWN((unsigned long) pal_vaddr); - __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr), - PAGE_KERNEL)); + __this_cpu_write(ia64_mca_pal_base, + GRANULEROUNDDOWN((unsigned long) pal_vaddr)); + __this_cpu_write(ia64_mca_pal_pte, pte_val(mk_pte_phys(__pa(pal_vaddr), + PAGE_KERNEL))); } static void ia64_mca_cmc_vector_adjust(void *dummy) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index deed6fa96bb0..b51514957620 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -215,7 +215,7 @@ static inline void play_dead(void) unsigned int this_cpu = smp_processor_id(); /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); max_xtp(); local_irq_disable(); @@ -273,7 +273,7 @@ ia64_save_extra (struct task_struct *task) if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) pfm_save_regs(task); - info = __get_cpu_var(pfm_syst_info); + info = __this_cpu_read(pfm_syst_info); if (info & PFM_CPUINFO_SYST_WIDE) pfm_syst_wide_update_task(task, info, 0); #endif @@ -293,7 +293,7 @@ ia64_load_extra (struct task_struct *task) if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) pfm_load_regs(task); - info = __get_cpu_var(pfm_syst_info); + info = __this_cpu_read(pfm_syst_info); if (info & PFM_CPUINFO_SYST_WIDE) pfm_syst_wide_update_task(task, info, 1); #endif diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index d3636e67a98e..6f7d4a4dcf24 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -299,7 +299,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { unsigned long count, current_jiffies = jiffies; - struct fpu_swa_msg *cp = &__get_cpu_var(cpulast); + struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast); if (unlikely(current_jiffies > cp->time)) cp->count = 0; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 68c845411624..f9c8d9fc5939 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -134,8 +134,8 @@ sn2_ipi_flush_all_tlb(struct mm_struct *mm) itc = ia64_get_itc(); smp_flush_tlb_cpumask(*mm_cpumask(mm)); itc = ia64_get_itc() - itc; - __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; - __get_cpu_var(ptcstats).shub_ipi_flushes++; + __this_cpu_add(ptcstats.shub_ipi_flushes_itc_clocks, itc); + __this_cpu_inc(ptcstats.shub_ipi_flushes); } /** @@ -199,14 +199,14 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, start += (1UL << nbits); } while (start < end); ia64_srlz_i(); - __get_cpu_var(ptcstats).ptc_l++; + __this_cpu_inc(ptcstats.ptc_l); preempt_enable(); return; } if (atomic_read(&mm->mm_users) == 1 && mymm) { flush_tlb_mm(mm); - __get_cpu_var(ptcstats).change_rid++; + __this_cpu_inc(ptcstats.change_rid); preempt_enable(); return; } @@ -250,11 +250,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, spin_lock_irqsave(PTC_LOCK(shub1), flags); itc2 = ia64_get_itc(); - __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; - __get_cpu_var(ptcstats).shub_ptc_flushes++; - __get_cpu_var(ptcstats).nodes_flushed += nix; + __this_cpu_add(ptcstats.lock_itc_clocks, itc2 - itc); + __this_cpu_inc(ptcstats.shub_ptc_flushes); + __this_cpu_add(ptcstats.nodes_flushed, nix); if (!mymm) - __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++; + __this_cpu_inc(ptcstats.shub_ptc_flushes_not_my_mm); if (use_cpu_ptcga && !mymm) { old_rr = ia64_get_rr(start); @@ -299,9 +299,9 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, done: itc2 = ia64_get_itc() - itc2; - __get_cpu_var(ptcstats).shub_itc_clocks += itc2; - if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) - __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + __this_cpu_add(ptcstats.shub_itc_clocks, itc2); + if (itc2 > __this_cpu_read(ptcstats.shub_itc_clocks_max)) + __this_cpu_write(ptcstats.shub_itc_clocks_max, itc2); if (old_rr) { ia64_set_rr(start, old_rr); @@ -311,7 +311,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, spin_unlock_irqrestore(PTC_LOCK(shub1), flags); if (flush_opt == 1 && deadlock) { - __get_cpu_var(ptcstats).deadlocks++; + __this_cpu_inc(ptcstats.deadlocks); sn2_ipi_flush_all_tlb(mm); } @@ -334,7 +334,7 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, short nasid, i; unsigned long *piows, zeroval, n; - __get_cpu_var(ptcstats).deadlocks++; + __this_cpu_inc(ptcstats.deadlocks); piows = (unsigned long *) pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; @@ -349,7 +349,7 @@ sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, ptc1 = CHANGE_NASID(nasid, ptc1); n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); - __get_cpu_var(ptcstats).deadlocks2 += n; + __this_cpu_add(ptcstats.deadlocks2, n); } } -- GitLab From 2999a4b354c24985268f9310bc9522ff358453a8 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:48 -0500 Subject: [PATCH 0274/1868] alpha: Replace __get_cpu_var __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) CC: Ivan Kokshaysky Cc: Matt Turner Acked-by: Richard Henderson Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/alpha/kernel/perf_event.c | 16 ++++++++-------- arch/alpha/kernel/time.c | 6 +++--- arch/powerpc/include/asm/cputime.h | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index c52e7f0ee5f6..5c218aa3f3df 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -431,7 +431,7 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) */ static int alpha_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int n0; int ret; @@ -483,7 +483,7 @@ static int alpha_pmu_add(struct perf_event *event, int flags) */ static void alpha_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; unsigned long irq_flags; int j; @@ -531,7 +531,7 @@ static void alpha_pmu_read(struct perf_event *event) static void alpha_pmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!(hwc->state & PERF_HES_STOPPED)) { cpuc->idx_mask &= ~(1UL<idx); @@ -551,7 +551,7 @@ static void alpha_pmu_stop(struct perf_event *event, int flags) static void alpha_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -724,7 +724,7 @@ static int alpha_pmu_event_init(struct perf_event *event) */ static void alpha_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->enabled) return; @@ -750,7 +750,7 @@ static void alpha_pmu_enable(struct pmu *pmu) static void alpha_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!cpuc->enabled) return; @@ -814,8 +814,8 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, struct hw_perf_event *hwc; int idx, j; - __get_cpu_var(irq_pmi_count)++; - cpuc = &__get_cpu_var(cpu_hw_events); + __this_cpu_inc(irq_pmi_count); + cpuc = this_cpu_ptr(&cpu_hw_events); /* Completely counting through the PMC's period to trigger a new PMC * overflow interrupt while in this interrupt routine is utterly diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index ee39cee8064c..643a9dcdf093 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -56,9 +56,9 @@ unsigned long est_cycle_freq; DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) void arch_irq_work_raise(void) { diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 607559ab271f..e5d10ab8463b 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -56,10 +56,10 @@ static inline unsigned long cputime_to_jiffies(const cputime_t ct) static inline cputime_t cputime_to_scaled(const cputime_t ct) { if (cpu_has_feature(CPU_FTR_SPURR) && - __get_cpu_var(cputime_last_delta)) + __this_cpu_read(cputime_last_delta)) return (__force u64) ct * - __get_cpu_var(cputime_scaled_last_delta) / - __get_cpu_var(cputime_last_delta); + __this_cpu_read(cputime_scaled_last_delta) / + __this_cpu_read(cputime_last_delta); return ct; } -- GitLab From 5828f666c069af74e00db21559f1535103c9f79a Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:49 -0500 Subject: [PATCH 0275/1868] powerpc: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) tj: Folded a fix patch. http://lkml.kernel.org/g/alpine.DEB.2.11.1408172143020.9652@gentwo.org Cc: Benjamin Herrenschmidt CC: Paul Mackerras Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/powerpc/include/asm/hardirq.h | 4 +++- arch/powerpc/include/asm/tlbflush.h | 4 ++-- arch/powerpc/include/asm/xics.h | 8 +++---- arch/powerpc/kernel/dbell.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 6 ++--- arch/powerpc/kernel/iommu.c | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 6 ++--- arch/powerpc/kernel/mce.c | 24 +++++++++---------- arch/powerpc/kernel/process.c | 10 ++++---- arch/powerpc/kernel/smp.c | 6 ++--- arch/powerpc/kernel/sysfs.c | 4 ++-- arch/powerpc/kernel/time.c | 22 ++++++++--------- arch/powerpc/kernel/traps.c | 8 +++---- arch/powerpc/kvm/e500.c | 14 +++++------ arch/powerpc/kvm/e500mc.c | 4 ++-- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 ++--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/perf/core-book3s.c | 22 ++++++++--------- arch/powerpc/perf/core-fsl-emb.c | 6 ++--- arch/powerpc/platforms/cell/interrupt.c | 6 ++--- .../platforms/powernv/opal-tracepoints.c | 4 ++-- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/pseries/dtl.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 4 ++-- arch/powerpc/platforms/pseries/iommu.c | 8 +++---- arch/powerpc/platforms/pseries/lpar.c | 6 ++--- arch/powerpc/platforms/pseries/ras.c | 4 ++-- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 32 files changed, 105 insertions(+), 103 deletions(-) diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 1bbb3013d6aa..8d907ba4fd05 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,7 +21,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending +#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) +#define set_softirq_pending(x) __this_cpu_write(irq_stat._softirq_pending, (x)) +#define or_softirq_pending(x) __this_cpu_or(irq_stat._softirq_pending, (x)) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 2def01ed0cb2..cd7c2719d3ef 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 282d43a0c855..5007ad0448ce 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -97,7 +97,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -110,7 +110,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -120,7 +120,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -132,7 +132,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index d55c76c571f3..f4217819cc31 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __get_cpu_var(irq_stat).doorbell_irqs++; + __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 0bb5918faaaf..b62f90eaf19e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg); + struct perf_event **slot = this_cpu_ptr(&bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __get_cpu_var(bp_per_reg); + bp = __this_cpu_read(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index a10642a0d861..71e60bfb89e2 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4c5891de162e..74d40c6855b8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __get_cpu_var(irq_stat).spurious_irqs++; + __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 8504657379f1..e77c3ccf8dcf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - &__get_cpu_var(kgdb_thread_info); + this_cpu_ptr(&kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2f72af82513c..7c053f281406 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index a7fd4cb78b78..15c99b649b04 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __get_cpu_var(mce_nest_count)++; - struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); + int index = __this_cpu_inc_return(mce_nest_count); + struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __get_cpu_var(mce_nest_count) - 1; + int index = __this_cpu_read(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = &__get_cpu_var(mce_event[index]); + mc_evt = this_cpu_ptr(&mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __get_cpu_var(mce_nest_count)--; + __this_cpu_dec(mce_nest_count); return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __get_cpu_var(mce_queue_count)++; + index = __this_cpu_inc_return(mce_queue_count); /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __get_cpu_var(mce_queue_count)--; + __this_cpu_dec(mce_queue_count); return; } - __get_cpu_var(mce_event_queue[index]) = evt; + memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__get_cpu_var(mce_queue_count) > 0) { - index = __get_cpu_var(mce_queue_count) - 1; + while (__this_cpu_read(mce_queue_count) > 0) { + index = __this_cpu_read(mce_queue_count) - 1; machine_check_print_event_info( - &__get_cpu_var(mce_event_queue[index])); - __get_cpu_var(mce_queue_count)--; + this_cpu_ptr(&mce_event_queue[index])); + __this_cpu_dec(mce_queue_count); } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bf44ae962ab8..2df2f2956520 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -498,7 +498,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __get_cpu_var(current_brk) = *brk; + __this_cpu_write(current_brk, *brk); if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -841,7 +841,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -855,7 +855,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -865,7 +865,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -888,7 +888,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = &__get_cpu_var(ppc64_tlb_batch); + batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a0738af4aba6..60391a51467a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -242,7 +242,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = &__get_cpu_var(ipi_message); + struct cpu_messages *info = this_cpu_ptr(&ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -438,9 +438,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __get_cpu_var(cpu_state) = CPU_DEAD; + __this_cpu_write(cpu_state, CPU_DEAD); smp_wmb(); - while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 67fd2fd2620a..fa1fd8a0c867 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__get_cpu_var(pmcs_enabled)) + if (__this_cpu_read(pmcs_enabled)) return; - __get_cpu_var(pmcs_enabled) = 1; + __this_cpu_write(pmcs_enabled, 1); if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 368ab374d33c..4769e5b7f905 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 -#define test_irq_work_pending() __get_cpu_var(irq_work_pending) -#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 +#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) +#define test_irq_work_pending() __this_cpu_read(irq_work_pending) +#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); - struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __get_cpu_var(irq_stat).timer_irqs_event++; + __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __get_cpu_var(irq_stat).timer_irqs_others++; + __this_cpu_inc(irq_stat.timer_irqs_others); } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); + struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0dc43f9932cf..e6595b72269b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __get_cpu_var(irq_stat).hmi_exceptions++; + __this_cpu_inc(irq_stat.hmi_exceptions); if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __get_cpu_var(irq_stat).mce_exceptions++; + __this_cpu_inc(irq_stat.mce_exceptions); /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __get_cpu_var(irq_stat).pmu_irqs++; + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); } diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 2e02ed849f36..16095841afe1 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = ++(__get_cpu_var(pcpu_last_used_sid)); + sid = __this_cpu_inc_return(pcpu_last_used_sid); if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; + __this_cpu_write(pcpu_sids)entry[sid], entry); entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && + entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); + __this_cpu_write(pcpu_last_used_sid, 0); + memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 164bad2a19bf..6ef54e523f33 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { + __this_cpu_read(last_vcpu_of_lpid[vcpu->kvm->arch.lpid]) != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; + __this_cpu_write(last_vcpu_of_lpid[vcpu->kvm->arch.lpid], vcpu); } kvmppc_load_guest_fp(vcpu); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index afc0a8295f84..504a16f1a1a0 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -625,7 +625,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index daee7f4e5a14..060d51fda35e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1314,7 +1314,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - &__get_cpu_var(ppc64_tlb_batch); + this_cpu_ptr(&ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 5e4ee2573903..ba47aaf33a4b 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = __get_cpu_var(next_tlbcam_idx); + index = this_cpu_read(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + __this_cpu_write(next_tlbcam_idx, tlbcam_index); else - __get_cpu_var(next_tlbcam_idx)++; + __this_cpu_inc(next_tlbcam_idx); return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..8aa04f03fd31 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = &get_cpu_var(hugepd_freelist_cur); + batchp = this_cpu_ptr(&hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b7cd00b0171e..690f9c7bf3c8 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ void power_pmu_start_txn(struct pmu *pmu) */ void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index d35ae52c69dc..4acaea01fe03 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 8a106b4172e0..4c11421847be 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = &__get_cpu_var(cpu_iic); + struct iic *iic = this_cpu_ptr(&cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = &__get_cpu_var(cpu_iic); + iic = this_cpu_ptr(&cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); + out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index d8a000a9988b..9527e2a7c541 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = &__get_cpu_var(opal_trace_depth); + depth = this_cpu_ptr(&opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 5f3b23220b8e..a6c42f34303a 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = &__get_cpu_var(ps3_private); + struct ps3_private *pd = this_cpu_ptr(&ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 1062f71f5a85..39049e4884fb 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); + struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 4575f0c9e521..f02ec3ab428c 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h = this_cpu_ptr(&hcall_stats[opcode / 4]); h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 4642d6a4d356..8c355ed4291e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -200,7 +200,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -213,7 +213,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -399,7 +399,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __get_cpu_var(tce_page); + tcep = __this_cpu_read(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -407,7 +407,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __get_cpu_var(tce_page) = tcep; + __this_cpu_write(tce_page, tcep); } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 34e64237fff9..56df72da59fe 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -507,7 +507,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -697,7 +697,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; @@ -722,7 +722,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = &__get_cpu_var(hcall_trace_depth); + depth = this_cpu_ptr(&hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index dff05b9eb946..179a69fd5568 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); + memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index fe0cca477164..365249cd346b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); + struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); /* * we have to reset the cppr index to 0 because we're -- GitLab From b4f501916ce2ae80c28017814d71d1bf83679271 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:50 -0500 Subject: [PATCH 0276/1868] tile: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Acked-by: Chris Metcalf Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/tile/include/asm/irqflags.h | 4 ++-- arch/tile/include/asm/mmu_context.h | 6 +++--- arch/tile/kernel/irq.c | 14 +++++++------- arch/tile/kernel/messaging.c | 4 ++-- arch/tile/kernel/process.c | 2 +- arch/tile/kernel/setup.c | 3 ++- arch/tile/kernel/single_step.c | 4 ++-- arch/tile/kernel/smp.c | 2 +- arch/tile/kernel/smpboot.c | 6 +++--- arch/tile/kernel/time.c | 8 ++++---- arch/tile/mm/highmem.c | 2 +- arch/tile/mm/init.c | 4 ++-- 12 files changed, 30 insertions(+), 29 deletions(-) diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 71af5747874d..60d62a292fce 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -140,12 +140,12 @@ extern unsigned int debug_smp_processor_id(void); /* * Read the set of maskable interrupts. - * We avoid the preemption warning here via __this_cpu_ptr since even + * We avoid the preemption warning here via raw_cpu_ptr since even * if irqs are already enabled, it's harmless to read the wrong cpu's * enabled mask. */ #define arch_local_irqs_enabled() \ - (*__this_cpu_ptr(&interrupts_enabled_mask)) + (*raw_cpu_ptr(&interrupts_enabled_mask)) /* Re-enable all maskable interrupts. */ #define arch_local_irq_enable() \ diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h index 4734215e2ad4..f67753db1f78 100644 --- a/arch/tile/include/asm/mmu_context.h +++ b/arch/tile/include/asm/mmu_context.h @@ -84,7 +84,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *t) * clear any pending DMA interrupts. */ if (current->thread.tile_dma_state.enabled) - install_page_table(mm->pgd, __get_cpu_var(current_asid)); + install_page_table(mm->pgd, __this_cpu_read(current_asid)); #endif } @@ -96,12 +96,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, int cpu = smp_processor_id(); /* Pick new ASID. */ - int asid = __get_cpu_var(current_asid) + 1; + int asid = __this_cpu_read(current_asid) + 1; if (asid > max_asid) { asid = min_asid; local_flush_tlb(); } - __get_cpu_var(current_asid) = asid; + __this_cpu_write(current_asid, asid); /* Clear cpu from the old mm, and set it in the new one. */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 637f2ffaa5f5..ba85765e1436 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -73,7 +73,7 @@ static DEFINE_PER_CPU(int, irq_depth); */ void tile_dev_intr(struct pt_regs *regs, int intnum) { - int depth = __get_cpu_var(irq_depth)++; + int depth = __this_cpu_inc_return(irq_depth); unsigned long original_irqs; unsigned long remaining_irqs; struct pt_regs *old_regs; @@ -120,7 +120,7 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) /* Count device irqs; Linux IPIs are counted elsewhere. */ if (irq != IRQ_RESCHEDULE) - __get_cpu_var(irq_stat).irq_dev_intr_count++; + __this_cpu_inc(irq_stat.irq_dev_intr_count); generic_handle_irq(irq); } @@ -130,10 +130,10 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) * including any that were reenabled during interrupt * handling. */ - if (depth == 0) - unmask_irqs(~__get_cpu_var(irq_disable_mask)); + if (depth == 1) + unmask_irqs(~__this_cpu_read(irq_disable_mask)); - __get_cpu_var(irq_depth)--; + __this_cpu_dec(irq_depth); /* * Track time spent against the current process again and @@ -151,7 +151,7 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) static void tile_irq_chip_enable(struct irq_data *d) { get_cpu_var(irq_disable_mask) &= ~(1UL << d->irq); - if (__get_cpu_var(irq_depth) == 0) + if (__this_cpu_read(irq_depth) == 0) unmask_irqs(1UL << d->irq); put_cpu_var(irq_disable_mask); } @@ -197,7 +197,7 @@ static void tile_irq_chip_ack(struct irq_data *d) */ static void tile_irq_chip_eoi(struct irq_data *d) { - if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) + if (!(__this_cpu_read(irq_disable_mask) & (1UL << d->irq))) unmask_irqs(1UL << d->irq); } diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 7867266f9716..ac950be1318e 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(HV_MsgState, msg_state); void init_messaging(void) { /* Allocate storage for messages in kernel space */ - HV_MsgState *state = &__get_cpu_var(msg_state); + HV_MsgState *state = this_cpu_ptr(&msg_state); int rc = hv_register_message_state(state); if (rc != HV_OK) panic("hv_register_message_state: error %d", rc); @@ -96,7 +96,7 @@ void hv_message_intr(struct pt_regs *regs, int intnum) struct hv_driver_cb *cb = (struct hv_driver_cb *)him->intarg; cb->callback(cb, him->intdata); - __get_cpu_var(irq_stat).irq_hv_msg_count++; + __this_cpu_inc(irq_stat.irq_hv_msg_count); } } diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 16ed58948757..0050cbc1d9de 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -64,7 +64,7 @@ early_param("idle", idle_setup); void arch_cpu_idle(void) { - __get_cpu_var(irq_stat).idle_timestamp = jiffies; + __this_cpu_write(irq_stat.idle_timestamp, jiffies); _cpu_idle(); } diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 112ababa9e55..b9736ded06f2 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1218,7 +1218,8 @@ static void __init validate_hv(void) * various asid variables to their appropriate initial states. */ asid_range = hv_inquire_asid(0); - __get_cpu_var(current_asid) = min_asid = asid_range.start; + min_asid = asid_range.start; + __this_cpu_write(current_asid, min_asid); max_asid = asid_range.start + asid_range.size - 1; if (hv_confstr(HV_CONFSTR_CHIP_MODEL, (HV_VirtAddr)chip_model, diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index de07fa7d1315..6cb2ce31b5a2 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -740,7 +740,7 @@ static DEFINE_PER_CPU(unsigned long, ss_saved_pc); void gx_singlestep_handle(struct pt_regs *regs, int fault_num) { - unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long *ss_pc = this_cpu_ptr(&ss_saved_pc); struct thread_info *info = (void *)current_thread_info(); int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); @@ -766,7 +766,7 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num) void single_step_once(struct pt_regs *regs) { - unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long *ss_pc = this_cpu_ptr(&ss_saved_pc); unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); *ss_pc = regs->pc; diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 01e8ab29f43a..3dbedb0174b3 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -188,7 +188,7 @@ void flush_icache_range(unsigned long start, unsigned long end) /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */ static irqreturn_t handle_reschedule_ipi(int irq, void *token) { - __get_cpu_var(irq_stat).irq_resched_count++; + __this_cpu_inc(irq_stat.irq_resched_count); scheduler_ipi(); return IRQ_HANDLED; diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 732e9d138661..0d59a1b60c74 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -41,7 +41,7 @@ void __init smp_prepare_boot_cpu(void) int cpu = smp_processor_id(); set_cpu_online(cpu, 1); set_cpu_present(cpu, 1); - __get_cpu_var(cpu_state) = CPU_ONLINE; + __this_cpu_write(cpu_state, CPU_ONLINE); init_messaging(); } @@ -158,7 +158,7 @@ static void start_secondary(void) /* printk(KERN_DEBUG "Initializing CPU#%d\n", cpuid); */ /* Initialize the current asid for our first page table. */ - __get_cpu_var(current_asid) = min_asid; + __this_cpu_write(current_asid, min_asid); /* Set up this thread as another owner of the init_mm */ atomic_inc(&init_mm.mm_count); @@ -201,7 +201,7 @@ void online_secondary(void) notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), 1); - __get_cpu_var(cpu_state) = CPU_ONLINE; + __this_cpu_write(cpu_state, CPU_ONLINE); /* Set up tile-specific state for this cpu. */ setup_cpu(0); diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index d8fbc289e680..ab1c9fe2aa7f 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -162,7 +162,7 @@ static DEFINE_PER_CPU(struct clock_event_device, tile_timer) = { void setup_tile_timer(void) { - struct clock_event_device *evt = &__get_cpu_var(tile_timer); + struct clock_event_device *evt = this_cpu_ptr(&tile_timer); /* Fill in fields that are speed-specific. */ clockevents_calc_mult_shift(evt, cycles_per_sec, TILE_MINSEC); @@ -182,7 +182,7 @@ void setup_tile_timer(void) void do_timer_interrupt(struct pt_regs *regs, int fault_num) { struct pt_regs *old_regs = set_irq_regs(regs); - struct clock_event_device *evt = &__get_cpu_var(tile_timer); + struct clock_event_device *evt = this_cpu_ptr(&tile_timer); /* * Mask the timer interrupt here, since we are a oneshot timer @@ -194,7 +194,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) irq_enter(); /* Track interrupt count. */ - __get_cpu_var(irq_stat).irq_timer_count++; + __this_cpu_inc(irq_stat.irq_timer_count); /* Call the generic timer handler */ evt->event_handler(evt); @@ -235,7 +235,7 @@ cycles_t ns2cycles(unsigned long nsecs) * We do not have to disable preemption here as each core has the same * clock frequency. */ - struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer); + struct clock_event_device *dev = raw_cpu_ptr(&tile_timer); /* * as in clocksource.h and x86's timer.h, we split the calculation diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 0dc218294770..6aa2f2625447 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -103,7 +103,7 @@ static void kmap_atomic_register(struct page *page, int type, spin_lock(&_lock); /* With interrupts disabled, now fill in the per-cpu info. */ - amp = &__get_cpu_var(amps).per_type[type]; + amp = this_cpu_ptr(&s.per_type[type]); amp->page = page; amp->cpu = smp_processor_id(); amp->va = va; diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index bfb3127b4df9..f46a152b09e6 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -593,14 +593,14 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) interrupt_mask_set_mask(-1ULL); rc = flush_and_install_context(__pa(pgtables), init_pgprot((unsigned long)pgtables), - __get_cpu_var(current_asid), + __this_cpu_read(current_asid), cpumask_bits(my_cpu_mask)); interrupt_mask_restore_mask(irqmask); BUG_ON(rc != 0); /* Copy the page table back to the normal swapper_pg_dir. */ memcpy(pgd_base, pgtables, sizeof(pgtables)); - __install_page_table(pgd_base, __get_cpu_var(current_asid), + __install_page_table(pgd_base, __this_cpu_read(current_asid), swapper_pgprot); /* -- GitLab From 81829a96869c8bad74a582705617e75758c4152d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:51 -0500 Subject: [PATCH 0277/1868] tile: Use this_cpu_ptr() for hardware counters Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/tile/kernel/perf_event.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c index 2bf6c9c135c1..bb509cee3b59 100644 --- a/arch/tile/kernel/perf_event.c +++ b/arch/tile/kernel/perf_event.c @@ -590,7 +590,7 @@ static int tile_event_set_period(struct perf_event *event) */ static void tile_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -616,7 +616,7 @@ static void tile_pmu_stop(struct perf_event *event, int flags) */ static void tile_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = event->hw.idx; if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -650,7 +650,7 @@ static void tile_pmu_start(struct perf_event *event, int flags) */ static int tile_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc; unsigned long mask; int b, max_cnt; @@ -706,7 +706,7 @@ static int tile_pmu_add(struct perf_event *event, int flags) */ static void tile_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; /* @@ -880,14 +880,14 @@ static struct pmu tilera_pmu = { int tile_pmu_handle_irq(struct pt_regs *regs, int fault) { struct perf_sample_data data; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; u64 val; unsigned long status; int bit; - __get_cpu_var(perf_irqs)++; + __this_cpu_inc(perf_irqs); if (!atomic_read(&tile_active_events)) return 0; -- GitLab From 7e788ab11d73fbb617973c12a9b3f84f93721e67 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:52 -0500 Subject: [PATCH 0278/1868] blackfin: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) CC: Mike Frysinger Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/blackfin/include/asm/ipipe.h | 2 +- arch/blackfin/kernel/perf_event.c | 10 +++++----- arch/blackfin/mach-common/ints-priority.c | 8 ++++---- arch/blackfin/mach-common/smp.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/blackfin/include/asm/ipipe.h b/arch/blackfin/include/asm/ipipe.h index 17b5e92e3bc6..fe1160fbff91 100644 --- a/arch/blackfin/include/asm/ipipe.h +++ b/arch/blackfin/include/asm/ipipe.h @@ -157,7 +157,7 @@ static inline unsigned long __ipipe_ffnz(unsigned long ul) } #define __ipipe_do_root_xirq(ipd, irq) \ - ((ipd)->irqs[irq].handler(irq, &__raw_get_cpu_var(__ipipe_tick_regs))) + ((ipd)->irqs[irq].handler(irq, raw_cpu_ptr(&__ipipe_tick_regs))) #define __ipipe_run_irqtail(irq) /* Must be a macro */ \ do { \ diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c index ea2032013cc2..1e9c8b0bf486 100644 --- a/arch/blackfin/kernel/perf_event.c +++ b/arch/blackfin/kernel/perf_event.c @@ -300,7 +300,7 @@ static void bfin_perf_event_update(struct perf_event *event, static void bfin_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -318,7 +318,7 @@ static void bfin_pmu_stop(struct perf_event *event, int flags) static void bfin_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -335,7 +335,7 @@ static void bfin_pmu_start(struct perf_event *event, int flags) static void bfin_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); bfin_pmu_stop(event, PERF_EF_UPDATE); __clear_bit(event->hw.idx, cpuc->used_mask); @@ -345,7 +345,7 @@ static void bfin_pmu_del(struct perf_event *event, int flags) static int bfin_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; int ret = -EAGAIN; @@ -421,7 +421,7 @@ static int bfin_pmu_event_init(struct perf_event *event) static void bfin_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; int i; diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 1f94784eab6d..e5910e9bc4ac 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -1309,12 +1309,12 @@ asmlinkage int __ipipe_grab_irq(int vec, struct pt_regs *regs) bfin_write_TIMER_STATUS(1); /* Latch TIMIL0 */ #endif /* This is basically what we need from the register frame. */ - __raw_get_cpu_var(__ipipe_tick_regs).ipend = regs->ipend; - __raw_get_cpu_var(__ipipe_tick_regs).pc = regs->pc; + __this_cpu_write(__ipipe_tick_regs.ipend, regs->ipend); + __this_cpu_write(__ipipe_tick_regs.pc, regs->pc); if (this_domain != ipipe_root_domain) - __raw_get_cpu_var(__ipipe_tick_regs).ipend &= ~0x10; + __this_cpu_and(__ipipe_tick_regs.ipend, ~0x10); else - __raw_get_cpu_var(__ipipe_tick_regs).ipend |= 0x10; + __this_cpu_or(__ipipe_tick_regs.ipend, 0x10); } /* diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index ba6c30d8534d..8ad3e90cc8fc 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -146,7 +146,7 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance) platform_clear_ipi(cpu, IRQ_SUPPLE_1); smp_rmb(); - bfin_ipi_data = &__get_cpu_var(bfin_ipi); + bfin_ipi_data = this_cpu_ptr(&bfin_ipi); while ((pending = atomic_xchg(&bfin_ipi_data->bits, 0)) != 0) { msg = 0; do { -- GitLab From 8c23af6122e56ad30e3de259731c31a998ff5b11 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:53 -0500 Subject: [PATCH 0279/1868] avr32: Replace __get_cpu_var with __this_cpu_write Replace the single use of __get_cpu_var in avr32 with __this_cpu_write. Cc: Haavard Skinnemoen Acked-by: Hans-Christian Egtvedt Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/avr32/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c index f820e9f25520..a94ece4a72c8 100644 --- a/arch/avr32/kernel/kprobes.c +++ b/arch/avr32/kernel/kprobes.c @@ -104,7 +104,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) static void __kprobes set_current_kprobe(struct kprobe *p) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } static int __kprobes kprobe_handler(struct pt_regs *regs) -- GitLab From 494fc42170bf0747ac28e12ef13a7d388d5ff2c7 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:54 -0500 Subject: [PATCH 0280/1868] sparc: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Cc: sparclinux@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/sparc/include/asm/cpudata_32.h | 2 +- arch/sparc/include/asm/cpudata_64.h | 2 +- arch/sparc/kernel/kprobes.c | 6 +++--- arch/sparc/kernel/leon_smp.c | 2 +- arch/sparc/kernel/nmi.c | 16 ++++++++-------- arch/sparc/kernel/pci_sun4v.c | 8 ++++---- arch/sparc/kernel/perf_event.c | 26 +++++++++++++------------- arch/sparc/kernel/sun4d_smp.c | 2 +- arch/sparc/kernel/time_64.c | 2 +- arch/sparc/mm/tlb.c | 4 ++-- 10 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h index 0300d94c25b3..05f366379f53 100644 --- a/arch/sparc/include/asm/cpudata_32.h +++ b/arch/sparc/include/asm/cpudata_32.h @@ -26,6 +26,6 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) -#define local_cpu_data() __get_cpu_var(__cpu_data) +#define local_cpu_data() (*this_cpu_ptr(&__cpu_data)) #endif /* _SPARC_CPUDATA_H */ diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 0e594076912c..a6e424d185d0 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -30,7 +30,7 @@ typedef struct { DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) -#define local_cpu_data() __get_cpu_var(__cpu_data) +#define local_cpu_data() (*this_cpu_ptr(&__cpu_data)) #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index 98d712843413..cd83be527586 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -83,7 +83,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc; kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil; @@ -92,7 +92,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); kcb->kprobe_orig_tnpc = regs->tnpc; kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL); } @@ -155,7 +155,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __get_cpu_var(current_kprobe); + p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) goto ss_probe; } diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 018ef11f57df..ea2bad306f93 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -343,7 +343,7 @@ static void leon_ipi_resched(int cpu) void leonsmp_ipi_interrupt(void) { - struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work); + struct leon_ipi_work *work = this_cpu_ptr(&leon_ipi_work); if (work->single) { work->single = 0; diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 5b1151dcba13..a9973bb4a1b2 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -100,20 +100,20 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); sum = local_cpu_data().irq0_irqs; - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; + if (__this_cpu_read(nmi_touch)) { + __this_cpu_write(nmi_touch, 0); touched = 1; } - if (!touched && __get_cpu_var(last_irq_sum) == sum) { + if (!touched && __this_cpu_read(last_irq_sum) == sum) { __this_cpu_inc(alert_counter); if (__this_cpu_read(alert_counter) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { - __get_cpu_var(last_irq_sum) = sum; + __this_cpu_write(last_irq_sum, sum); __this_cpu_write(alert_counter, 0); } - if (__get_cpu_var(wd_enabled)) { + if (__this_cpu_read(wd_enabled)) { pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable); } @@ -154,7 +154,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) void stop_nmi_watchdog(void *unused) { pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); - __get_cpu_var(wd_enabled) = 0; + __this_cpu_write(wd_enabled, 0); atomic_dec(&nmi_active); } @@ -207,7 +207,7 @@ static int __init check_nmi_watchdog(void) void start_nmi_watchdog(void *unused) { - __get_cpu_var(wd_enabled) = 1; + __this_cpu_write(wd_enabled, 1); atomic_inc(&nmi_active); pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); @@ -218,7 +218,7 @@ void start_nmi_watchdog(void *unused) static void nmi_adjust_hz_one(void *unused) { - if (!__get_cpu_var(wd_enabled)) + if (!__this_cpu_read(wd_enabled)) return; pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d07f6b29aed8..49d33b178793 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -48,7 +48,7 @@ static int iommu_batch_initialized; /* Interrupts must be disabled. */ static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); p->dev = dev; p->prot = prot; @@ -94,7 +94,7 @@ static long iommu_batch_flush(struct iommu_batch *p) static inline void iommu_batch_new_entry(unsigned long entry) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); if (p->entry + p->npages == entry) return; @@ -106,7 +106,7 @@ static inline void iommu_batch_new_entry(unsigned long entry) /* Interrupts must be disabled. */ static inline long iommu_batch_add(u64 phys_page) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); BUG_ON(p->npages >= PGLIST_NENTS); @@ -120,7 +120,7 @@ static inline long iommu_batch_add(u64 phys_page) /* Interrupts must be disabled. */ static inline long iommu_batch_end(void) { - struct iommu_batch *p = &__get_cpu_var(iommu_batch); + struct iommu_batch *p = this_cpu_ptr(&iommu_batch); BUG_ON(p->npages >= PGLIST_NENTS); diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d35c490a91cb..264049a6cb74 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1013,7 +1013,7 @@ static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) static void sparc_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; if (cpuc->enabled) @@ -1031,7 +1031,7 @@ static void sparc_pmu_enable(struct pmu *pmu) static void sparc_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; if (!cpuc->enabled) @@ -1065,7 +1065,7 @@ static int active_event_index(struct cpu_hw_events *cpuc, static void sparc_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = active_event_index(cpuc, event); if (flags & PERF_EF_RELOAD) { @@ -1080,7 +1080,7 @@ static void sparc_pmu_start(struct perf_event *event, int flags) static void sparc_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = active_event_index(cpuc, event); if (!(event->hw.state & PERF_HES_STOPPED)) { @@ -1096,7 +1096,7 @@ static void sparc_pmu_stop(struct perf_event *event, int flags) static void sparc_pmu_del(struct perf_event *event, int _flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned long flags; int i; @@ -1133,7 +1133,7 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) static void sparc_pmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx = active_event_index(cpuc, event); struct hw_perf_event *hwc = &event->hw; @@ -1145,7 +1145,7 @@ static DEFINE_MUTEX(pmc_grab_mutex); static void perf_stop_nmi_watchdog(void *unused) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; stop_nmi_watchdog(NULL); @@ -1356,7 +1356,7 @@ static int collect_events(struct perf_event *group, int max_count, static int sparc_pmu_add(struct perf_event *event, int ef_flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int n0, ret = -EAGAIN; unsigned long flags; @@ -1498,7 +1498,7 @@ static int sparc_pmu_event_init(struct perf_event *event) */ static void sparc_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1511,7 +1511,7 @@ static void sparc_pmu_start_txn(struct pmu *pmu) */ static void sparc_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1524,13 +1524,13 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu) */ static int sparc_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int n; if (!sparc_pmu) return -EINVAL; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); n = cpuc->n_events; if (check_excludes(cpuc->event, 0, n)) return -EINVAL; @@ -1601,7 +1601,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); /* If the PMU has the TOE IRQ enable bits, we need to do a * dummy write to the %pcr to clear the overflow bits and thus diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index d5c319553fd0..9d98e5002a09 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -204,7 +204,7 @@ static void __init smp4d_ipi_init(void) void sun4d_ipi_interrupt(void) { - struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work); + struct sun4d_ipi_work *work = this_cpu_ptr(&sun4d_ipi_work); if (work->single) { work->single = 0; diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3fddf64c7fc6..59da0c3ea788 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -765,7 +765,7 @@ void setup_sparc64_timer(void) : /* no outputs */ : "r" (pstate)); - sevt = &__get_cpu_var(sparc64_events); + sevt = this_cpu_ptr(&sparc64_events); memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); sevt->cpumask = cpumask_of(smp_processor_id()); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index b89aba217e3b..9df2190c097e 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -52,14 +52,14 @@ void flush_tlb_pending(void) void arch_enter_lazy_mmu_mode(void) { - struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); tb->active = 1; } void arch_leave_lazy_mmu_mode(void) { - struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + struct tlb_batch *tb = this_cpu_ptr(&tlb_batch); if (tb->tlb_nr) flush_tlb_pending(); -- GitLab From 77422a8fee1b96d2420a8a254f784bab8644ac41 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:55 -0500 Subject: [PATCH 0281/1868] clocksource: Replace __this_cpu_ptr with raw_cpu_ptr One newly introduced __this_cpu_ptr should be raw_cpu_ptr. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- drivers/clocksource/qcom-timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/qcom-timer.c b/drivers/clocksource/qcom-timer.c index 8d115db1e651..098c542e5c53 100644 --- a/drivers/clocksource/qcom-timer.c +++ b/drivers/clocksource/qcom-timer.c @@ -219,7 +219,7 @@ static void __init msm_timer_init(u32 dgt_hz, int sched_bits, int irq, } /* Immediately configure the timer on the boot CPU */ - msm_local_timer_setup(__this_cpu_ptr(msm_evt)); + msm_local_timer_setup(raw_cpu_ptr(msm_evt)); } err: -- GitLab From 47405a253da4d8ca4b18ad537423083fdd790440 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 17 Aug 2014 12:30:56 -0500 Subject: [PATCH 0282/1868] percpu: Remove __this_cpu_ptr The __this_cpu_ptr macro is no longer in use so drop it. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/percpu-defs.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index cfd56046ecec..420032d41d27 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -257,9 +257,6 @@ do { \ #define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) -/* keep until we have removed all uses of __this_cpu_ptr */ -#define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) - /* * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. -- GitLab From eb571eeade2598635f813b3284d02c13a380301e Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Wed, 2 Jul 2014 15:35:16 -0400 Subject: [PATCH 0283/1868] block,scsi: verify return pointer from blk_get_request The blk-core dead queue checks introduce an error scenario to blk_get_request that returns NULL if the request queue has been shutdown. This affects the behavior for __GFP_WAIT callers, who should verify the return value before dereferencing. Signed-off-by: Joe Lawrence Acked-by: Jiri Kosina [for pktdvd] Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/scsi_ioctl.c | 9 ++++++++- drivers/block/paride/pd.c | 2 ++ drivers/block/pktcdvd.c | 2 ++ drivers/scsi/scsi_error.c | 2 ++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 51bf5155ee75..29d056782833 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -448,6 +448,10 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); + if (!rq) { + err = -ENODEV; + goto error_free_buffer; + } cmdlen = COMMAND_SIZE(opcode); @@ -520,8 +524,9 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } error: - kfree(buffer); blk_put_request(rq); +error_free_buffer: + kfree(buffer); return err; } EXPORT_SYMBOL_GPL(sg_scsi_ioctl); @@ -534,6 +539,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, int err; rq = blk_get_request(q, WRITE, __GFP_WAIT); + if (!rq) + return -ENODEV; blk_rq_set_block_pc(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; rq->cmd[0] = cmd; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index fea7e76a00de..ca831f741d89 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -722,6 +722,8 @@ static int pd_special_command(struct pd_unit *disk, int err = 0; rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT); + if (!rq) + return -ENODEV; rq->cmd_type = REQ_TYPE_SPECIAL; rq->special = func; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 758ac442c5b5..7fa8c80e8982 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,6 +704,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ, __GFP_WAIT); + if (!rq) + return -ENODEV; blk_rq_set_block_pc(rq); if (cgc->buflen) { diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 5db8454474ee..4c433bf47a06 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1960,6 +1960,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) * request becomes available */ req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); + if (!req) + return; blk_rq_set_block_pc(req); -- GitLab From f1217ed09f827e42a49ffa6a5aab672aa6f57a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 13:16:04 +0200 Subject: [PATCH 0284/1868] drm/ttm: move fpfn and lpfn into each placement v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to more fine grained specify where to place the buffer object. v2: rebased on drm-next, add bochs changes as well Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/ast/ast_drv.h | 2 +- drivers/gpu/drm/ast/ast_ttm.c | 20 +-- drivers/gpu/drm/bochs/bochs.h | 2 +- drivers/gpu/drm/bochs/bochs_mm.c | 20 ++- drivers/gpu/drm/cirrus/cirrus_drv.h | 2 +- drivers/gpu/drm/cirrus/cirrus_ttm.c | 17 ++- drivers/gpu/drm/mgag200/mgag200_drv.h | 2 +- drivers/gpu/drm/mgag200/mgag200_ttm.c | 20 +-- drivers/gpu/drm/nouveau/nouveau_bo.c | 52 +++++-- drivers/gpu/drm/nouveau/nouveau_bo.h | 4 +- drivers/gpu/drm/nouveau/nouveau_ttm.c | 9 +- drivers/gpu/drm/qxl/qxl_drv.h | 2 +- drivers/gpu/drm/qxl/qxl_object.c | 17 ++- drivers/gpu/drm/qxl/qxl_ttm.c | 8 +- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 71 +++++---- drivers/gpu/drm/radeon/radeon_ttm.c | 25 ++-- drivers/gpu/drm/radeon/radeon_uvd.c | 8 +- drivers/gpu/drm/ttm/ttm_bo.c | 93 +++++------- drivers/gpu/drm/ttm/ttm_bo_manager.c | 9 +- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 136 +++++++++++------- drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c | 22 ++- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 10 +- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 3 +- include/drm/ttm/ttm_bo_api.h | 40 +++--- include/drm/ttm/ttm_bo_driver.h | 3 +- 26 files changed, 346 insertions(+), 253 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 957d4fabf1e1..cb91c2acc3cb 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -316,7 +316,7 @@ struct ast_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; #define gem_to_ast_bo(gobj) container_of((gobj), struct ast_bo, gem) diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index b8246227bab0..8008ea0bc76c 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -293,18 +293,22 @@ void ast_mm_fini(struct ast_private *ast) void ast_ttm_placement(struct ast_bo *bo, int domain) { u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; + unsigned i; + bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; if (domain & TTM_PL_FLAG_SYSTEM) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!c) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; bo->placement.num_placement = c; bo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; + } } int ast_bo_create(struct drm_device *dev, int size, int align, @@ -360,7 +364,7 @@ int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr) ast_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -383,7 +387,7 @@ int ast_bo_unpin(struct ast_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -407,7 +411,7 @@ int ast_bo_push_sysram(struct ast_bo *bo) ast_ttm_placement(bo, TTM_PL_FLAG_SYSTEM); for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) { diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h index 7eb52dd44b01..4f6e7b3a3635 100644 --- a/drivers/gpu/drm/bochs/bochs.h +++ b/drivers/gpu/drm/bochs/bochs.h @@ -99,7 +99,7 @@ struct bochs_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c index 1728a1b0b813..2af30e7607d7 100644 --- a/drivers/gpu/drm/bochs/bochs_mm.c +++ b/drivers/gpu/drm/bochs/bochs_mm.c @@ -257,20 +257,26 @@ void bochs_mm_fini(struct bochs_device *bochs) static void bochs_ttm_placement(struct bochs_bo *bo, int domain) { + unsigned i; u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) { - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED + bo->placements[c++].flags = TTM_PL_FLAG_WC + | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; } if (domain & TTM_PL_FLAG_SYSTEM) { - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING + | TTM_PL_FLAG_SYSTEM; } if (!c) { - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING + | TTM_PL_FLAG_SYSTEM; + } + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; } bo->placement.num_placement = c; bo->placement.num_busy_placement = c; @@ -294,7 +300,7 @@ int bochs_bo_pin(struct bochs_bo *bo, u32 pl_flag, u64 *gpu_addr) bochs_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -319,7 +325,7 @@ int bochs_bo_unpin(struct bochs_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h index 401c890b6c6a..dd2cfc9024aa 100644 --- a/drivers/gpu/drm/cirrus/cirrus_drv.h +++ b/drivers/gpu/drm/cirrus/cirrus_drv.h @@ -163,7 +163,7 @@ struct cirrus_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; #define gem_to_cirrus_bo(gobj) container_of((gobj), struct cirrus_bo, gem) diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index 92e6b7786097..3e7d758330a9 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -298,18 +298,21 @@ void cirrus_mm_fini(struct cirrus_device *cirrus) void cirrus_ttm_placement(struct cirrus_bo *bo, int domain) { u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; + unsigned i; bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; if (domain & TTM_PL_FLAG_SYSTEM) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!c) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; bo->placement.num_placement = c; bo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; + } } int cirrus_bo_create(struct drm_device *dev, int size, int align, @@ -365,7 +368,7 @@ int cirrus_bo_pin(struct cirrus_bo *bo, u32 pl_flag, u64 *gpu_addr) cirrus_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -392,7 +395,7 @@ int cirrus_bo_push_sysram(struct cirrus_bo *bo) cirrus_ttm_placement(bo, TTM_PL_FLAG_SYSTEM); for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) { diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 80de23d9b9c9..2e2b76aa4e17 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -224,7 +224,7 @@ struct mgag200_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; #define gem_to_mga_bo(gobj) container_of((gobj), struct mgag200_bo, gem) diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index 5a00e90696de..be883ef5a1d3 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -293,18 +293,22 @@ void mgag200_mm_fini(struct mga_device *mdev) void mgag200_ttm_placement(struct mgag200_bo *bo, int domain) { u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; + unsigned i; + bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; if (domain & TTM_PL_FLAG_SYSTEM) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!c) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; bo->placement.num_placement = c; bo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; + } } int mgag200_bo_create(struct drm_device *dev, int size, int align, @@ -361,7 +365,7 @@ int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr) mgag200_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -384,7 +388,7 @@ int mgag200_bo_unpin(struct mgag200_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -408,7 +412,7 @@ int mgag200_bo_push_sysram(struct mgag200_bo *bo) mgag200_ttm_placement(bo, TTM_PL_FLAG_SYSTEM); for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 01da508625f2..0591ca0734e3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -241,16 +241,16 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, } static void -set_placement_list(uint32_t *pl, unsigned *n, uint32_t type, uint32_t flags) +set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t type, uint32_t flags) { *n = 0; if (type & TTM_PL_FLAG_VRAM) - pl[(*n)++] = TTM_PL_FLAG_VRAM | flags; + pl[(*n)++].flags = TTM_PL_FLAG_VRAM | flags; if (type & TTM_PL_FLAG_TT) - pl[(*n)++] = TTM_PL_FLAG_TT | flags; + pl[(*n)++].flags = TTM_PL_FLAG_TT | flags; if (type & TTM_PL_FLAG_SYSTEM) - pl[(*n)++] = TTM_PL_FLAG_SYSTEM | flags; + pl[(*n)++].flags = TTM_PL_FLAG_SYSTEM | flags; } static void @@ -258,6 +258,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); u32 vram_pages = drm->device.info.ram_size >> PAGE_SHIFT; + unsigned i, fpfn, lpfn; if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS && nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) && @@ -269,11 +270,19 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) * at the same time. */ if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) { - nvbo->placement.fpfn = vram_pages / 2; - nvbo->placement.lpfn = ~0; + fpfn = vram_pages / 2; + lpfn = ~0; } else { - nvbo->placement.fpfn = 0; - nvbo->placement.lpfn = vram_pages / 2; + fpfn = 0; + lpfn = vram_pages / 2; + } + for (i = 0; i < nvbo->placement.num_placement; ++i) { + nvbo->placements[i].fpfn = fpfn; + nvbo->placements[i].lpfn = lpfn; + } + for (i = 0; i < nvbo->placement.num_busy_placement; ++i) { + nvbo->busy_placements[i].fpfn = fpfn; + nvbo->busy_placements[i].lpfn = lpfn; } } } @@ -1041,12 +1050,15 @@ static int nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { - u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING; + struct ttm_place placement_memtype = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING + }; struct ttm_placement placement; struct ttm_mem_reg tmp_mem; int ret; - placement.fpfn = placement.lpfn = 0; placement.num_placement = placement.num_busy_placement = 1; placement.placement = placement.busy_placement = &placement_memtype; @@ -1074,12 +1086,15 @@ static int nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { - u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING; + struct ttm_place placement_memtype = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING + }; struct ttm_placement placement; struct ttm_mem_reg tmp_mem; int ret; - placement.fpfn = placement.lpfn = 0; placement.num_placement = placement.num_busy_placement = 1; placement.placement = placement.busy_placement = &placement_memtype; @@ -1294,7 +1309,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) struct nouveau_bo *nvbo = nouveau_bo(bo); struct nvif_device *device = &drm->device; u32 mappable = nv_device_resource_len(nvkm_device(device), 1) >> PAGE_SHIFT; - int ret; + int i, ret; /* as long as the bo isn't in vram, and isn't tiled, we've got * nothing to do here. @@ -1319,9 +1334,16 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) bo->mem.start + bo->mem.num_pages < mappable) return 0; + for (i = 0; i < nvbo->placement.num_placement; ++i) { + nvbo->placements[i].fpfn = 0; + nvbo->placements[i].lpfn = mappable; + } + + for (i = 0; i < nvbo->placement.num_busy_placement; ++i) { + nvbo->busy_placements[i].fpfn = 0; + nvbo->busy_placements[i].lpfn = mappable; + } - nvbo->placement.fpfn = 0; - nvbo->placement.lpfn = mappable; nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0); return nouveau_bo_validate(nvbo, false, false); } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index ff17c1f432fc..4ef88e84a694 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -9,8 +9,8 @@ struct nouveau_bo { struct ttm_buffer_object bo; struct ttm_placement placement; u32 valid_domains; - u32 placements[3]; - u32 busy_placements[3]; + struct ttm_place placements[3]; + struct ttm_place busy_placements[3]; struct ttm_bo_kmap_obj kmap; struct list_head head; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 53874b76b031..e81d086577ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -71,8 +71,7 @@ nouveau_vram_manager_del(struct ttm_mem_type_manager *man, static int nouveau_vram_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct nouveau_drm *drm = nouveau_bdev(man->bdev); @@ -158,8 +157,7 @@ nouveau_gart_manager_del(struct ttm_mem_type_manager *man, static int nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); @@ -239,8 +237,7 @@ nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) static int nv04_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct nouveau_mem *node; diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 36ed40ba773f..f6022b703645 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -106,7 +106,7 @@ struct qxl_bo { /* Protected by gem.mutex */ struct list_head list; /* Protected by tbo.reserved */ - u32 placements[3]; + struct ttm_place placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index b95f144f0b49..adad12d30372 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -55,21 +55,24 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned) { u32 c = 0; u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0; + unsigned i; - qbo->placement.fpfn = 0; - qbo->placement.lpfn = 0; qbo->placement.placement = qbo->placements; qbo->placement.busy_placement = qbo->placements; if (domain == QXL_GEM_DOMAIN_VRAM) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag; + qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag; if (domain == QXL_GEM_DOMAIN_SURFACE) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag; + qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag; if (domain == QXL_GEM_DOMAIN_CPU) - qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag; + qbo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag; if (!c) - qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + qbo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; qbo->placement.num_placement = c; qbo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + qbo->placements[i].fpfn = 0; + qbo->placements[i].lpfn = 0; + } } @@ -259,7 +262,7 @@ int qxl_bo_unpin(struct qxl_bo *bo) if (bo->pin_count) return 0; for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (unlikely(r != 0)) dev_err(qdev->dev, "%p validate failed for unpin\n", bo); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 71a1baeac14e..f66c59b222f1 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -188,11 +188,13 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { struct qxl_bo *qbo; - static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + static struct ttm_place placements = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM + }; if (!qxl_ttm_bo_is_qxl_bo(bo)) { - placement->fpfn = 0; - placement->lpfn = 0; placement->placement = &placements; placement->busy_placement = &placements; placement->num_placement = 1; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b321ad4dcafd..bb01dab513dd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -469,7 +469,7 @@ struct radeon_bo { struct list_head list; /* Protected by tbo.reserved */ u32 initial_domain; - u32 placements[3]; + struct ttm_place placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 287523807989..0129c7efae3b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -97,40 +97,56 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) { u32 c = 0, i; - rbo->placement.fpfn = 0; - rbo->placement.lpfn = 0; rbo->placement.placement = rbo->placements; rbo->placement.busy_placement = rbo->placements; if (domain & RADEON_GEM_DOMAIN_VRAM) - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_VRAM; + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_VRAM; + if (domain & RADEON_GEM_DOMAIN_GTT) { if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; + rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_TT; + } else if ((rbo->flags & RADEON_GEM_GTT_WC) || (rbo->rdev->flags & RADEON_IS_AGP)) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_TT; } } + if (domain & RADEON_GEM_DOMAIN_CPU) { if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_SYSTEM; + } else if ((rbo->flags & RADEON_GEM_GTT_WC) || rbo->rdev->flags & RADEON_IS_AGP) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_SYSTEM; } } if (!c) - rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_MASK_CACHING | + TTM_PL_FLAG_SYSTEM; + rbo->placement.num_placement = c; rbo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + rbo->placements[i].fpfn = 0; + rbo->placements[i].lpfn = 0; + } + /* * Use two-ended allocation depending on the buffer size to * improve fragmentation quality. @@ -138,7 +154,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) */ if (rbo->tbo.mem.size > 512 * 1024) { for (i = 0; i < c; i++) { - rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN; + rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; } } } @@ -287,21 +303,22 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, return 0; } radeon_ttm_placement_from_domain(bo, domain); - if (domain == RADEON_GEM_DOMAIN_VRAM) { + for (i = 0; i < bo->placement.num_placement; i++) { + unsigned lpfn = 0; + /* force to pin into visible video ram */ - bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; - } - if (max_offset) { - u64 lpfn = max_offset >> PAGE_SHIFT; + if (bo->placements[i].flags & TTM_PL_FLAG_VRAM) + lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; + else + lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; /* ??? */ - if (!bo->placement.lpfn) - bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; + if (max_offset) + lpfn = min (lpfn, (unsigned)(max_offset >> PAGE_SHIFT)); - if (lpfn < bo->placement.lpfn) - bo->placement.lpfn = lpfn; + bo->placements[i].lpfn = lpfn; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { bo->pin_count = 1; @@ -333,8 +350,10 @@ int radeon_bo_unpin(struct radeon_bo *bo) bo->pin_count--; if (bo->pin_count) return 0; - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + for (i = 0; i < bo->placement.num_placement; i++) { + bo->placements[i].lpfn = 0; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; + } r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { if (bo->tbo.mem.mem_type == TTM_PL_VRAM) @@ -735,7 +754,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); - rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; + rbo->placements[0].lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; r = ttm_bo_validate(bo, &rbo->placement, false, false); if (unlikely(r == -ENOMEM)) { radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 12e37b1ddc40..822eb3630045 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -178,12 +178,15 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, static void radeon_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { + static struct ttm_place placements = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM + }; + struct radeon_bo *rbo; - static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!radeon_ttm_bo_is_radeon_bo(bo)) { - placement->fpfn = 0; - placement->lpfn = 0; placement->placement = &placements; placement->busy_placement = &placements; placement->num_placement = 1; @@ -286,20 +289,20 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, struct radeon_device *rdev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; - u32 placements; + struct ttm_place placements; struct ttm_placement placement; int r; rdev = radeon_get_rdev(bo->bdev); tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 1; placement.placement = &placements; placement.num_busy_placement = 1; placement.busy_placement = &placements; - placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.fpfn = 0; + placements.lpfn = 0; + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { @@ -334,19 +337,19 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; - u32 placements; + struct ttm_place placements; int r; rdev = radeon_get_rdev(bo->bdev); tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 1; placement.placement = &placements; placement.num_busy_placement = 1; placement.busy_placement = &placements; - placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.fpfn = 0; + placements.lpfn = 0; + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 341848a14376..25c8a1fd152c 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -233,8 +233,12 @@ int radeon_uvd_resume(struct radeon_device *rdev) void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) { - rbo->placement.fpfn = 0 >> PAGE_SHIFT; - rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + int i; + + for (i = 0; i < rbo->placement.num_placement; ++i) { + rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; + rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + } } void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3da89d5dab60..b992ec3c318a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -53,12 +53,13 @@ static struct attribute ttm_bo_count = { .mode = S_IRUGO }; -static inline int ttm_mem_type_from_flags(uint32_t flags, uint32_t *mem_type) +static inline int ttm_mem_type_from_place(const struct ttm_place *place, + uint32_t *mem_type) { int i; for (i = 0; i <= TTM_PL_PRIV5; i++) - if (flags & (1 << i)) { + if (place->flags & (1 << i)) { *mem_type = i; return 0; } @@ -89,12 +90,12 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, bo, bo->mem.num_pages, bo->mem.size >> 10, bo->mem.size >> 20); for (i = 0; i < placement->num_placement; i++) { - ret = ttm_mem_type_from_flags(placement->placement[i], + ret = ttm_mem_type_from_place(&placement->placement[i], &mem_type); if (ret) return; pr_err(" placement[%d]=0x%08X (%d)\n", - i, placement->placement[i], mem_type); + i, placement->placement[i].flags, mem_type); ttm_mem_type_debug(bo->bdev, mem_type); } } @@ -685,8 +686,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, evict_mem.bus.io_reserved_vm = false; evict_mem.bus.io_reserved_count = 0; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 0; placement.num_busy_placement = 0; bdev->driver->evict_flags(bo, &placement); @@ -774,7 +773,7 @@ EXPORT_SYMBOL(ttm_bo_mem_put); */ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, uint32_t mem_type, - struct ttm_placement *placement, + const struct ttm_place *place, struct ttm_mem_reg *mem, bool interruptible, bool no_wait_gpu) @@ -784,7 +783,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, int ret; do { - ret = (*man->func->get_node)(man, bo, placement, 0, mem); + ret = (*man->func->get_node)(man, bo, place, mem); if (unlikely(ret != 0)) return ret; if (mem->mm_node) @@ -827,18 +826,18 @@ static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man, uint32_t mem_type, - uint32_t proposed_placement, + const struct ttm_place *place, uint32_t *masked_placement) { uint32_t cur_flags = ttm_bo_type_flags(mem_type); - if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0) + if ((cur_flags & place->flags & TTM_PL_MASK_MEM) == 0) return false; - if ((proposed_placement & man->available_caching) == 0) + if ((place->flags & man->available_caching) == 0) return false; - cur_flags |= (proposed_placement & man->available_caching); + cur_flags |= (place->flags & man->available_caching); *masked_placement = cur_flags; return true; @@ -869,15 +868,14 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, mem->mm_node = NULL; for (i = 0; i < placement->num_placement; ++i) { - ret = ttm_mem_type_from_flags(placement->placement[i], - &mem_type); + const struct ttm_place *place = &placement->placement[i]; + + ret = ttm_mem_type_from_place(place, &mem_type); if (ret) return ret; man = &bdev->man[mem_type]; - type_ok = ttm_bo_mt_compatible(man, - mem_type, - placement->placement[i], + type_ok = ttm_bo_mt_compatible(man, mem_type, place, &cur_flags); if (!type_ok) @@ -889,7 +887,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, * Use the access and other non-mapping-related flag bits from * the memory placement flags to the current flags */ - ttm_flag_masked(&cur_flags, placement->placement[i], + ttm_flag_masked(&cur_flags, place->flags, ~TTM_PL_MASK_MEMTYPE); if (mem_type == TTM_PL_SYSTEM) @@ -897,8 +895,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (man->has_type && man->use_type) { type_found = true; - ret = (*man->func->get_node)(man, bo, placement, - cur_flags, mem); + ret = (*man->func->get_node)(man, bo, place, mem); if (unlikely(ret)) return ret; } @@ -916,17 +913,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, return -EINVAL; for (i = 0; i < placement->num_busy_placement; ++i) { - ret = ttm_mem_type_from_flags(placement->busy_placement[i], - &mem_type); + const struct ttm_place *place = &placement->busy_placement[i]; + + ret = ttm_mem_type_from_place(place, &mem_type); if (ret) return ret; man = &bdev->man[mem_type]; if (!man->has_type) continue; - if (!ttm_bo_mt_compatible(man, - mem_type, - placement->busy_placement[i], - &cur_flags)) + if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags)) continue; cur_flags = ttm_bo_select_caching(man, bo->mem.placement, @@ -935,7 +930,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, * Use the access and other non-mapping-related flag bits from * the memory placement flags to the current flags */ - ttm_flag_masked(&cur_flags, placement->busy_placement[i], + ttm_flag_masked(&cur_flags, place->flags, ~TTM_PL_MASK_MEMTYPE); if (mem_type == TTM_PL_SYSTEM) { @@ -945,7 +940,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, return 0; } - ret = ttm_bo_mem_force_space(bo, mem_type, placement, mem, + ret = ttm_bo_mem_force_space(bo, mem_type, place, mem, interruptible, no_wait_gpu); if (ret == 0 && mem->mm_node) { mem->placement = cur_flags; @@ -1006,20 +1001,27 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement, { int i; - if (mem->mm_node && placement->lpfn != 0 && - (mem->start < placement->fpfn || - mem->start + mem->num_pages > placement->lpfn)) - return false; - for (i = 0; i < placement->num_placement; i++) { - *new_flags = placement->placement[i]; + const struct ttm_place *heap = &placement->placement[i]; + if (mem->mm_node && heap->lpfn != 0 && + (mem->start < heap->fpfn || + mem->start + mem->num_pages > heap->lpfn)) + continue; + + *new_flags = heap->flags; if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) && (*new_flags & mem->placement & TTM_PL_MASK_MEM)) return true; } for (i = 0; i < placement->num_busy_placement; i++) { - *new_flags = placement->busy_placement[i]; + const struct ttm_place *heap = &placement->busy_placement[i]; + if (mem->mm_node && heap->lpfn != 0 && + (mem->start < heap->fpfn || + mem->start + mem->num_pages > heap->lpfn)) + continue; + + *new_flags = heap->flags; if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) && (*new_flags & mem->placement & TTM_PL_MASK_MEM)) return true; @@ -1037,11 +1039,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, uint32_t new_flags; lockdep_assert_held(&bo->resv->lock.base); - /* Check that range is valid */ - if (placement->lpfn || placement->fpfn) - if (placement->fpfn > placement->lpfn || - (placement->lpfn - placement->fpfn) < bo->num_pages) - return -EINVAL; /* * Check whether we need to move buffer. */ @@ -1070,15 +1067,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_validate); -int ttm_bo_check_placement(struct ttm_buffer_object *bo, - struct ttm_placement *placement) -{ - BUG_ON((placement->fpfn || placement->lpfn) && - (bo->mem.num_pages > (placement->lpfn - placement->fpfn))); - - return 0; -} - int ttm_bo_init(struct ttm_bo_device *bdev, struct ttm_buffer_object *bo, unsigned long size, @@ -1147,15 +1135,12 @@ int ttm_bo_init(struct ttm_bo_device *bdev, atomic_inc(&bo->glob->bo_count); drm_vma_node_reset(&bo->vma_node); - ret = ttm_bo_check_placement(bo, placement); - /* * For ttm_bo_type_device buffers, allocate * address space from the device. */ - if (likely(!ret) && - (bo->type == ttm_bo_type_device || - bo->type == ttm_bo_type_sg)) + if (bo->type == ttm_bo_type_device || + bo->type == ttm_bo_type_sg) ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node, bo->mem.num_pages); diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index 9e103a4875c8..964387fc5c8f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -49,8 +49,7 @@ struct ttm_range_manager { static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv; @@ -60,7 +59,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, unsigned long lpfn; int ret; - lpfn = placement->lpfn; + lpfn = place->lpfn; if (!lpfn) lpfn = man->size; @@ -68,13 +67,13 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, if (!node) return -ENOMEM; - if (flags & TTM_PL_FLAG_TOPDOWN) + if (place->flags & TTM_PL_FLAG_TOPDOWN) aflags = DRM_MM_CREATE_TOP; spin_lock(&rman->lock); ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages, mem->page_alignment, 0, - placement->fpfn, lpfn, + place->fpfn, lpfn, DRM_MM_SEARCH_BEST, aflags); spin_unlock(&rman->lock); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 6327cfc36805..37c093c0c7b8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -30,66 +30,101 @@ #include #include -static uint32_t vram_placement_flags = TTM_PL_FLAG_VRAM | - TTM_PL_FLAG_CACHED; - -static uint32_t vram_ne_placement_flags = TTM_PL_FLAG_VRAM | - TTM_PL_FLAG_CACHED | - TTM_PL_FLAG_NO_EVICT; +static struct ttm_place vram_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED +}; -static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM | - TTM_PL_FLAG_CACHED; +static struct ttm_place vram_ne_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +}; -static uint32_t sys_ne_placement_flags = TTM_PL_FLAG_SYSTEM | - TTM_PL_FLAG_CACHED | - TTM_PL_FLAG_NO_EVICT; +static struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED +}; -static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR | - TTM_PL_FLAG_CACHED; +static struct ttm_place sys_ne_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +}; -static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR | - TTM_PL_FLAG_CACHED | - TTM_PL_FLAG_NO_EVICT; +static struct ttm_place gmr_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED +}; -static uint32_t mob_placement_flags = VMW_PL_FLAG_MOB | - TTM_PL_FLAG_CACHED; +static struct ttm_place gmr_ne_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +}; -struct ttm_placement vmw_vram_placement = { +static struct ttm_place mob_placement_flags = { .fpfn = 0, .lpfn = 0, + .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED +}; + +struct ttm_placement vmw_vram_placement = { .num_placement = 1, .placement = &vram_placement_flags, .num_busy_placement = 1, .busy_placement = &vram_placement_flags }; -static uint32_t vram_gmr_placement_flags[] = { - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED, - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED +static struct ttm_place vram_gmr_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + } }; -static uint32_t gmr_vram_placement_flags[] = { - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED, - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED +static struct ttm_place gmr_vram_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED + } }; struct ttm_placement vmw_vram_gmr_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 2, .placement = vram_gmr_placement_flags, .num_busy_placement = 1, .busy_placement = &gmr_placement_flags }; -static uint32_t vram_gmr_ne_placement_flags[] = { - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT, - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +static struct ttm_place vram_gmr_ne_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_NO_EVICT + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_NO_EVICT + } }; struct ttm_placement vmw_vram_gmr_ne_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 2, .placement = vram_gmr_ne_placement_flags, .num_busy_placement = 1, @@ -97,8 +132,6 @@ struct ttm_placement vmw_vram_gmr_ne_placement = { }; struct ttm_placement vmw_vram_sys_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &vram_placement_flags, .num_busy_placement = 1, @@ -106,8 +139,6 @@ struct ttm_placement vmw_vram_sys_placement = { }; struct ttm_placement vmw_vram_ne_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &vram_ne_placement_flags, .num_busy_placement = 1, @@ -115,8 +146,6 @@ struct ttm_placement vmw_vram_ne_placement = { }; struct ttm_placement vmw_sys_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &sys_placement_flags, .num_busy_placement = 1, @@ -124,24 +153,33 @@ struct ttm_placement vmw_sys_placement = { }; struct ttm_placement vmw_sys_ne_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &sys_ne_placement_flags, .num_busy_placement = 1, .busy_placement = &sys_ne_placement_flags }; -static uint32_t evictable_placement_flags[] = { - TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED, - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED, - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED, - VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED +static struct ttm_place evictable_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED + } }; struct ttm_placement vmw_evictable_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 4, .placement = evictable_placement_flags, .num_busy_placement = 1, @@ -149,8 +187,6 @@ struct ttm_placement vmw_evictable_placement = { }; struct ttm_placement vmw_srf_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .num_busy_placement = 2, .placement = &gmr_placement_flags, @@ -158,8 +194,6 @@ struct ttm_placement vmw_srf_placement = { }; struct ttm_placement vmw_mob_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .num_busy_placement = 1, .placement = &mob_placement_flags, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c index ed1d51006ab1..914b375763dc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c @@ -198,13 +198,19 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv, { struct ttm_buffer_object *bo = &buf->base; struct ttm_placement placement; + struct ttm_place place; int ret = 0; if (pin) - placement = vmw_vram_ne_placement; + place = vmw_vram_ne_placement.placement[0]; else - placement = vmw_vram_placement; - placement.lpfn = bo->num_pages; + place = vmw_vram_placement.placement[0]; + place.lpfn = bo->num_pages; + + placement.num_placement = 1; + placement.placement = &place; + placement.num_busy_placement = 1; + placement.busy_placement = &place; ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible); if (unlikely(ret != 0)) @@ -293,21 +299,23 @@ void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo, */ void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin) { - uint32_t pl_flags; + struct ttm_place pl; struct ttm_placement placement; uint32_t old_mem_type = bo->mem.mem_type; int ret; lockdep_assert_held(&bo->resv->lock.base); - pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB + pl.fpfn = 0; + pl.lpfn = 0; + pl.flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB | TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED; if (pin) - pl_flags |= TTM_PL_FLAG_NO_EVICT; + pl.flags |= TTM_PL_FLAG_NO_EVICT; memset(&placement, 0, sizeof(placement)); placement.num_placement = 1; - placement.placement = &pl_flags; + placement.placement = &pl; ret = ttm_bo_validate(bo, &placement, false, true); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index b031b48dbb3c..0a474f391fad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -374,10 +374,16 @@ static int vmw_fb_create_bo(struct vmw_private *vmw_priv, size_t size, struct vmw_dma_buffer **out) { struct vmw_dma_buffer *vmw_bo; - struct ttm_placement ne_placement = vmw_vram_ne_placement; + struct ttm_place ne_place = vmw_vram_ne_placement.placement[0]; + struct ttm_placement ne_placement; int ret; - ne_placement.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + ne_placement.num_placement = 1; + ne_placement.placement = &ne_place; + ne_placement.num_busy_placement = 1; + ne_placement.busy_placement = &ne_place; + + ne_place.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; (void) ttm_write_lock(&vmw_priv->reservation_sem, false); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 26f8bdde3529..170b61be1e4e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -46,8 +46,7 @@ struct vmwgfx_gmrid_man { static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct vmwgfx_gmrid_man *gman = diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 7526c5bf5610..e3d39c80a091 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -45,12 +45,24 @@ struct ttm_bo_device; struct drm_mm_node; +/** + * struct ttm_place + * + * @fpfn: first valid page frame number to put the object + * @lpfn: last valid page frame number to put the object + * @flags: memory domain and caching flags for the object + * + * Structure indicating a possible place to put an object. + */ +struct ttm_place { + unsigned fpfn; + unsigned lpfn; + uint32_t flags; +}; /** * struct ttm_placement * - * @fpfn: first valid page frame number to put the object - * @lpfn: last valid page frame number to put the object * @num_placement: number of preferred placements * @placement: preferred placements * @num_busy_placement: number of preferred placements when need to evict buffer @@ -59,12 +71,10 @@ struct drm_mm_node; * Structure indicating the placement you request for an object. */ struct ttm_placement { - unsigned fpfn; - unsigned lpfn; - unsigned num_placement; - const uint32_t *placement; - unsigned num_busy_placement; - const uint32_t *busy_placement; + unsigned num_placement; + const struct ttm_place *placement; + unsigned num_busy_placement; + const struct ttm_place *busy_placement; }; /** @@ -518,20 +528,6 @@ extern int ttm_bo_create(struct ttm_bo_device *bdev, struct file *persistent_swap_storage, struct ttm_buffer_object **p_bo); -/** - * ttm_bo_check_placement - * - * @bo: the buffer object. - * @placement: placements - * - * Performs minimal validity checking on an intended change of - * placement flags. - * Returns - * -EINVAL: Intended change is invalid or not allowed. - */ -extern int ttm_bo_check_placement(struct ttm_buffer_object *bo, - struct ttm_placement *placement); - /** * ttm_bo_init_mm * diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1d9f0f1ff52d..5c8bb5699a6f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -208,8 +208,7 @@ struct ttm_mem_type_manager_func { */ int (*get_node)(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem); /** -- GitLab From 23f66e2d661b4d3226d16e25910a9e9472ce2410 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 27 Aug 2014 11:18:29 -0400 Subject: [PATCH 0285/1868] Revert "powerpc: Replace __get_cpu_var uses" This reverts commit 5828f666c069af74e00db21559f1535103c9f79a due to build failure after merging with pending powerpc changes. Link: http://lkml.kernel.org/g/20140827142243.6277eaff@canb.auug.org.au Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell Cc: Christoph Lameter Cc: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hardirq.h | 4 +--- arch/powerpc/include/asm/tlbflush.h | 4 ++-- arch/powerpc/include/asm/xics.h | 8 +++---- arch/powerpc/kernel/dbell.c | 2 +- arch/powerpc/kernel/hw_breakpoint.c | 6 ++--- arch/powerpc/kernel/iommu.c | 2 +- arch/powerpc/kernel/irq.c | 4 ++-- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 6 ++--- arch/powerpc/kernel/mce.c | 24 +++++++++---------- arch/powerpc/kernel/process.c | 10 ++++---- arch/powerpc/kernel/smp.c | 6 ++--- arch/powerpc/kernel/sysfs.c | 4 ++-- arch/powerpc/kernel/time.c | 22 ++++++++--------- arch/powerpc/kernel/traps.c | 8 +++---- arch/powerpc/kvm/e500.c | 14 +++++------ arch/powerpc/kvm/e500mc.c | 4 ++-- arch/powerpc/mm/hash_native_64.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/hugetlbpage-book3e.c | 6 ++--- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/perf/core-book3s.c | 22 ++++++++--------- arch/powerpc/perf/core-fsl-emb.c | 6 ++--- arch/powerpc/platforms/cell/interrupt.c | 6 ++--- .../platforms/powernv/opal-tracepoints.c | 4 ++-- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/pseries/dtl.c | 2 +- arch/powerpc/platforms/pseries/hvCall_inst.c | 4 ++-- arch/powerpc/platforms/pseries/iommu.c | 8 +++---- arch/powerpc/platforms/pseries/lpar.c | 6 ++--- arch/powerpc/platforms/pseries/ras.c | 4 ++-- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 32 files changed, 103 insertions(+), 105 deletions(-) diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 8d907ba4fd05..1bbb3013d6aa 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -21,9 +21,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT -#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) -#define set_softirq_pending(x) __this_cpu_write(irq_stat._softirq_pending, (x)) -#define or_softirq_pending(x) __this_cpu_or(irq_stat._softirq_pending, (x)) +#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index cd7c2719d3ef..2def01ed0cb2 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -107,14 +107,14 @@ extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); static inline void arch_enter_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); batch->active = 1; } static inline void arch_leave_lazy_mmu_mode(void) { - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); if (batch->index) __flush_tlb_pending(batch); diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 5007ad0448ce..282d43a0c855 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -97,7 +97,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr); static inline void xics_push_cppr(unsigned int vec) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) return; @@ -110,7 +110,7 @@ static inline void xics_push_cppr(unsigned int vec) static inline unsigned char xics_pop_cppr(void) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); if (WARN_ON(os_cppr->index < 1)) return LOWEST_PRIORITY; @@ -120,7 +120,7 @@ static inline unsigned char xics_pop_cppr(void) static inline void xics_set_base_cppr(unsigned char cppr) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); /* we only really want to set the priority when there's * just one cppr value on the stack @@ -132,7 +132,7 @@ static inline void xics_set_base_cppr(unsigned char cppr) static inline unsigned char xics_cppr_top(void) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); return os_cppr->stack[os_cppr->index]; } diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f4217819cc31..d55c76c571f3 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -41,7 +41,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); - __this_cpu_inc(irq_stat.doorbell_irqs); + __get_cpu_var(irq_stat).doorbell_irqs++; smp_ipi_demux(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index b62f90eaf19e..0bb5918faaaf 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -63,7 +63,7 @@ int hw_breakpoint_slots(int type) int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot = &__get_cpu_var(bp_per_reg); *slot = bp; @@ -88,7 +88,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) */ void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - struct perf_event **slot = this_cpu_ptr(&bp_per_reg); + struct perf_event **slot = &__get_cpu_var(bp_per_reg); if (*slot != bp) { WARN_ONCE(1, "Can't find the breakpoint"); @@ -226,7 +226,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) */ rcu_read_lock(); - bp = __this_cpu_read(bp_per_reg); + bp = __get_cpu_var(bp_per_reg); if (!bp) goto out; info = counter_arch_bp(bp); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 71e60bfb89e2..a10642a0d861 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 74d40c6855b8..4c5891de162e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -114,7 +114,7 @@ static inline notrace void set_soft_enabled(unsigned long enable) static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); return now >= *next_tb; } @@ -499,7 +499,7 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) - __this_cpu_inc(irq_stat.spurious_irqs); + __get_cpu_var(irq_stat).spurious_irqs++; else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e77c3ccf8dcf..8504657379f1 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -155,7 +155,7 @@ static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; struct thread_info *backup_current_thread_info = - this_cpu_ptr(&kgdb_thread_info); + &__get_cpu_var(kgdb_thread_info); if (user_mode(regs)) return 0; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7c053f281406..2f72af82513c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -119,7 +119,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; kcb->kprobe_status = kcb->prev_kprobe.status; kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr; } @@ -127,7 +127,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - __this_cpu_write(current_kprobe, p); + __get_cpu_var(current_kprobe) = p; kcb->kprobe_saved_msr = regs->msr; } @@ -192,7 +192,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ret = 1; goto no_kprobe; } - p = __this_cpu_read(current_kprobe); + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 15c99b649b04..a7fd4cb78b78 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -73,8 +73,8 @@ void save_mce_event(struct pt_regs *regs, long handled, uint64_t nip, uint64_t addr) { uint64_t srr1; - int index = __this_cpu_inc_return(mce_nest_count); - struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); + int index = __get_cpu_var(mce_nest_count)++; + struct machine_check_event *mce = &__get_cpu_var(mce_event[index]); /* * Return if we don't have enough space to log mce event. @@ -143,7 +143,7 @@ void save_mce_event(struct pt_regs *regs, long handled, */ int get_mce_event(struct machine_check_event *mce, bool release) { - int index = __this_cpu_read(mce_nest_count) - 1; + int index = __get_cpu_var(mce_nest_count) - 1; struct machine_check_event *mc_evt; int ret = 0; @@ -153,7 +153,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) /* Check if we have MCE info to process. */ if (index < MAX_MC_EVT) { - mc_evt = this_cpu_ptr(&mce_event[index]); + mc_evt = &__get_cpu_var(mce_event[index]); /* Copy the event structure and release the original */ if (mce) *mce = *mc_evt; @@ -163,7 +163,7 @@ int get_mce_event(struct machine_check_event *mce, bool release) } /* Decrement the count to free the slot. */ if (release) - __this_cpu_dec(mce_nest_count); + __get_cpu_var(mce_nest_count)--; return ret; } @@ -184,13 +184,13 @@ void machine_check_queue_event(void) if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; - index = __this_cpu_inc_return(mce_queue_count); + index = __get_cpu_var(mce_queue_count)++; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { - __this_cpu_dec(mce_queue_count); + __get_cpu_var(mce_queue_count)--; return; } - memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); + __get_cpu_var(mce_event_queue[index]) = evt; /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); @@ -208,11 +208,11 @@ static void machine_check_process_queued_event(struct irq_work *work) * For now just print it to console. * TODO: log this error event to FSP or nvram. */ - while (__this_cpu_read(mce_queue_count) > 0) { - index = __this_cpu_read(mce_queue_count) - 1; + while (__get_cpu_var(mce_queue_count) > 0) { + index = __get_cpu_var(mce_queue_count) - 1; machine_check_print_event_info( - this_cpu_ptr(&mce_event_queue[index])); - __this_cpu_dec(mce_queue_count); + &__get_cpu_var(mce_event_queue[index])); + __get_cpu_var(mce_queue_count)--; } } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2df2f2956520..bf44ae962ab8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -498,7 +498,7 @@ static inline int set_dawr(struct arch_hw_breakpoint *brk) void __set_breakpoint(struct arch_hw_breakpoint *brk) { - __this_cpu_write(current_brk, *brk); + __get_cpu_var(current_brk) = *brk; if (cpu_has_feature(CPU_FTR_DAWR)) set_dawr(brk); @@ -841,7 +841,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * schedule DABR */ #ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) + if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) __set_breakpoint(&new->thread.hw_brk); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif @@ -855,7 +855,7 @@ struct task_struct *__switch_to(struct task_struct *prev, * Collect processor utilization data per process */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; cu->current_tb = current_tb = mfspr(SPRN_PURR); @@ -865,7 +865,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_BOOK3S_64 - batch = this_cpu_ptr(&ppc64_tlb_batch); + batch = &__get_cpu_var(ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; if (batch->index) @@ -888,7 +888,7 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; - batch = this_cpu_ptr(&ppc64_tlb_batch); + batch = &__get_cpu_var(ppc64_tlb_batch); batch->active = 1; } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 60391a51467a..a0738af4aba6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -242,7 +242,7 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) irqreturn_t smp_ipi_demux(void) { - struct cpu_messages *info = this_cpu_ptr(&ipi_message); + struct cpu_messages *info = &__get_cpu_var(ipi_message); unsigned int all; mb(); /* order any irq clear */ @@ -438,9 +438,9 @@ void generic_mach_cpu_die(void) idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); - __this_cpu_write(cpu_state, CPU_DEAD); + __get_cpu_var(cpu_state) = CPU_DEAD; smp_wmb(); - while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index fa1fd8a0c867..67fd2fd2620a 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -394,10 +394,10 @@ void ppc_enable_pmcs(void) ppc_set_pmu_inuse(1); /* Only need to enable them once */ - if (__this_cpu_read(pmcs_enabled)) + if (__get_cpu_var(pmcs_enabled)) return; - __this_cpu_write(pmcs_enabled, 1); + __get_cpu_var(pmcs_enabled) = 1; if (ppc_md.enable_pmcs) ppc_md.enable_pmcs(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4769e5b7f905..368ab374d33c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -458,9 +458,9 @@ static inline void clear_irq_work_pending(void) DEFINE_PER_CPU(u8, irq_work_pending); -#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1) -#define test_irq_work_pending() __this_cpu_read(irq_work_pending) -#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ @@ -482,8 +482,8 @@ void arch_irq_work_raise(void) void __timer_interrupt(void) { struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - struct clock_event_device *evt = this_cpu_ptr(&decrementers); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); u64 now; trace_timer_interrupt_entry(regs); @@ -498,7 +498,7 @@ void __timer_interrupt(void) *next_tb = ~(u64)0; if (evt->event_handler) evt->event_handler(evt); - __this_cpu_inc(irq_stat.timer_irqs_event); + __get_cpu_var(irq_stat).timer_irqs_event++; } else { now = *next_tb - now; if (now <= DECREMENTER_MAX) @@ -506,13 +506,13 @@ void __timer_interrupt(void) /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); - __this_cpu_inc(irq_stat.timer_irqs_others); + __get_cpu_var(irq_stat).timer_irqs_others++; } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); + struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } #endif @@ -527,7 +527,7 @@ void __timer_interrupt(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -813,7 +813,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __this_cpu_write(decrementers_next_tb, get_tb_or_rtc() + evt); + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); /* We may have raced with new irq work */ @@ -833,7 +833,7 @@ static void decrementer_set_mode(enum clock_event_mode mode, /* Interrupt handler for the timer broadcast IPI */ void tick_broadcast_ipi_handler(void) { - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); *next_tb = get_tb_or_rtc(); __timer_interrupt(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e6595b72269b..0dc43f9932cf 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,7 +295,7 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; - __this_cpu_inc(irq_stat.mce_exceptions); + __get_cpu_var(irq_stat).mce_exceptions++; if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); @@ -304,7 +304,7 @@ long machine_check_early(struct pt_regs *regs) long hmi_exception_realmode(struct pt_regs *regs) { - __this_cpu_inc(irq_stat.hmi_exceptions); + __get_cpu_var(irq_stat).hmi_exceptions++; if (ppc_md.hmi_exception_early) ppc_md.hmi_exception_early(regs); @@ -700,7 +700,7 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __this_cpu_inc(irq_stat.mce_exceptions); + __get_cpu_var(irq_stat).mce_exceptions++; /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU @@ -1519,7 +1519,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { - __this_cpu_inc(irq_stat.pmu_irqs); + __get_cpu_var(irq_stat).pmu_irqs++; perf_irq(regs); } diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 16095841afe1..2e02ed849f36 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -76,11 +76,11 @@ static inline int local_sid_setup_one(struct id *entry) unsigned long sid; int ret = -1; - sid = __this_cpu_inc_return(pcpu_last_used_sid); + sid = ++(__get_cpu_var(pcpu_last_used_sid)); if (sid < NUM_TIDS) { - __this_cpu_write(pcpu_sids)entry[sid], entry); + __get_cpu_var(pcpu_sids).entry[sid] = entry; entry->val = sid; - entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]); + entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; ret = sid; } @@ -108,8 +108,8 @@ static inline int local_sid_setup_one(struct id *entry) static inline int local_sid_lookup(struct id *entry) { if (entry && entry->val != 0 && - __this_cpu_read(pcpu_sids.entry[entry->val]) == entry && - entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val])) + __get_cpu_var(pcpu_sids).entry[entry->val] == entry && + entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) return entry->val; return -1; } @@ -117,8 +117,8 @@ static inline int local_sid_lookup(struct id *entry) /* Invalidate all id mappings on local core -- call with preempt disabled */ static inline void local_sid_destroy_all(void) { - __this_cpu_write(pcpu_last_used_sid, 0); - memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids)); + __get_cpu_var(pcpu_last_used_sid) = 0; + memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); } static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 6ef54e523f33..164bad2a19bf 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_GESR, vcpu->arch.shared->esr); if (vcpu->arch.oldpir != mfspr(SPRN_PIR) || - __this_cpu_read(last_vcpu_of_lpid[vcpu->kvm->arch.lpid]) != vcpu) { + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) { kvmppc_e500_tlbil_all(vcpu_e500); - __this_cpu_write(last_vcpu_of_lpid[vcpu->kvm->arch.lpid], vcpu); + __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu; } kvmppc_load_guest_fp(vcpu); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 504a16f1a1a0..afc0a8295f84 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -625,7 +625,7 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long want_v; unsigned long flags; real_pte_t pte; - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); unsigned long psize = batch->psize; int ssize = batch->ssize; int i; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 060d51fda35e..daee7f4e5a14 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1314,7 +1314,7 @@ void flush_hash_range(unsigned long number, int local) else { int i; struct ppc64_tlb_batch *batch = - this_cpu_ptr(&ppc64_tlb_batch); + &__get_cpu_var(ppc64_tlb_batch); for (i = 0; i < number; i++) flush_hash_page(batch->vpn[i], batch->pte[i], diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index ba47aaf33a4b..5e4ee2573903 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -33,13 +33,13 @@ static inline int tlb1_next(void) ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - index = this_cpu_read(next_tlbcam_idx); + index = __get_cpu_var(next_tlbcam_idx); /* Just round-robin the entries and wrap when we hit the end */ if (unlikely(index == ncams - 1)) - __this_cpu_write(next_tlbcam_idx, tlbcam_index); + __get_cpu_var(next_tlbcam_idx) = tlbcam_index; else - __this_cpu_inc(next_tlbcam_idx); + __get_cpu_var(next_tlbcam_idx)++; return index; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8aa04f03fd31..7e70ae968e5f 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -462,7 +462,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = this_cpu_ptr(&hugepd_freelist_cur); + batchp = &get_cpu_var(hugepd_freelist_cur); if (atomic_read(&tlb->mm->mm_users) < 2 || cpumask_equal(mm_cpumask(tlb->mm), diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 690f9c7bf3c8..b7cd00b0171e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -339,7 +339,7 @@ static void power_pmu_bhrb_reset(void) static void power_pmu_bhrb_enable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -354,7 +354,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event) static void power_pmu_bhrb_disable(struct perf_event *event) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); if (!ppmu->bhrb_nr) return; @@ -1144,7 +1144,7 @@ static void power_pmu_disable(struct pmu *pmu) if (!ppmu) return; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { /* @@ -1211,7 +1211,7 @@ static void power_pmu_enable(struct pmu *pmu) return; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -1403,7 +1403,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * Add the event to the list (if there is room) * and check whether the total set is still feasible. */ - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); n0 = cpuhw->n_events; if (n0 >= ppmu->n_counter) goto out; @@ -1469,7 +1469,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) power_pmu_read(event); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { while (++i < cpuhw->n_events) { @@ -1575,7 +1575,7 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) */ void power_pmu_start_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; @@ -1589,7 +1589,7 @@ void power_pmu_start_txn(struct pmu *pmu) */ void power_pmu_cancel_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1607,7 +1607,7 @@ int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1964,7 +1964,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); power_pmu_bhrb_read(cpuhw); data.br_stack = &cpuhw->bhrb_stack; } @@ -2037,7 +2037,7 @@ static bool pmc_overflow(unsigned long val) static void perf_event_interrupt(struct pt_regs *regs) { int i, j; - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); struct perf_event *event; unsigned long val[8]; int found, active; diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 4acaea01fe03..d35ae52c69dc 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) { cpuhw->disabled = 1; @@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu) unsigned long flags; local_irq_save(flags); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = &__get_cpu_var(cpu_hw_events); if (!cpuhw->disabled) goto out; @@ -653,7 +653,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, static void perf_event_interrupt(struct pt_regs *regs) { int i; - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); struct perf_event *event; unsigned long val; int found = 0; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 4c11421847be..8a106b4172e0 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -82,7 +82,7 @@ static void iic_unmask(struct irq_data *d) static void iic_eoi(struct irq_data *d) { - struct iic *iic = this_cpu_ptr(&cpu_iic); + struct iic *iic = &__get_cpu_var(cpu_iic); out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]); BUG_ON(iic->eoi_ptr < 0); } @@ -148,7 +148,7 @@ static unsigned int iic_get_irq(void) struct iic *iic; unsigned int virq; - iic = this_cpu_ptr(&cpu_iic); + iic = &__get_cpu_var(cpu_iic); *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) @@ -163,7 +163,7 @@ static unsigned int iic_get_irq(void) void iic_setup_cpu(void) { - out_be64(this_cpu_ptr(&cpu_iic.regs->prio), 0xff); + out_be64(&__get_cpu_var(cpu_iic).regs->prio, 0xff); } u8 iic_get_target_id(int cpu) diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index 9527e2a7c541..d8a000a9988b 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -48,7 +48,7 @@ void __trace_opal_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = this_cpu_ptr(&opal_trace_depth); + depth = &__get_cpu_var(opal_trace_depth); if (*depth) goto out; @@ -69,7 +69,7 @@ void __trace_opal_exit(long opcode, unsigned long retval) local_irq_save(flags); - depth = this_cpu_ptr(&opal_trace_depth); + depth = &__get_cpu_var(opal_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index a6c42f34303a..5f3b23220b8e 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -711,7 +711,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq) static unsigned int ps3_get_irq(void) { - struct ps3_private *pd = this_cpu_ptr(&ps3_private); + struct ps3_private *pd = &__get_cpu_var(ps3_private); u64 x = (pd->bmp.status & pd->bmp.mask); unsigned int plug; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 39049e4884fb..1062f71f5a85 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -75,7 +75,7 @@ static atomic_t dtl_count; */ static void consume_dtle(struct dtl_entry *dtle, u64 index) { - struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); + struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings); struct dtl_entry *wp = dtlr->write_ptr; struct lppaca *vpa = local_paca->lppaca_ptr; diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index f02ec3ab428c..4575f0c9e521 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -110,7 +110,7 @@ static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = this_cpu_ptr(&hcall_stats[opcode / 4]); + h = &__get_cpu_var(hcall_stats)[opcode / 4]; h->tb_start = mftb(); h->purr_start = mfspr(SPRN_PURR); } @@ -123,7 +123,7 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, unsigned long if (opcode > MAX_HCALL_OPCODE) return; - h = this_cpu_ptr(&hcall_stats[opcode / 4]); + h = &__get_cpu_var(hcall_stats)[opcode / 4]; h->num_calls++; h->tb_total += mftb() - h->tb_start; h->purr_total += mfspr(SPRN_PURR) - h->purr_start; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 8c355ed4291e..4642d6a4d356 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -200,7 +200,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, local_irq_save(flags); /* to protect tcep and the page behind it */ - tcep = __this_cpu_read(tce_page); + tcep = __get_cpu_var(tce_page); /* This is safe to do since interrupts are off when we're called * from iommu_alloc{,_sg}() @@ -213,7 +213,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, direction, attrs); } - __this_cpu_write(tce_page, tcep); + __get_cpu_var(tce_page) = tcep; } rpn = __pa(uaddr) >> TCE_SHIFT; @@ -399,7 +399,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, long l, limit; local_irq_disable(); /* to protect tcep and the page behind it */ - tcep = __this_cpu_read(tce_page); + tcep = __get_cpu_var(tce_page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); @@ -407,7 +407,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, local_irq_enable(); return -ENOMEM; } - __this_cpu_write(tce_page, tcep); + __get_cpu_var(tce_page) = tcep; } proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 56df72da59fe..34e64237fff9 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -507,7 +507,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; - struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); + struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; unsigned long hash, index, shift, hidx, slot; @@ -697,7 +697,7 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args) local_irq_save(flags); - depth = this_cpu_ptr(&hcall_trace_depth); + depth = &__get_cpu_var(hcall_trace_depth); if (*depth) goto out; @@ -722,7 +722,7 @@ void __trace_hcall_exit(long opcode, unsigned long retval, local_irq_save(flags); - depth = this_cpu_ptr(&hcall_trace_depth); + depth = &__get_cpu_var(hcall_trace_depth); if (*depth) goto out; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 179a69fd5568..dff05b9eb946 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -302,8 +302,8 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; if (!rtas_error_extended(h)) { - memcpy(this_cpu_ptr(&mce_data_buf), h, sizeof(__u64)); - errhdr = (struct rtas_error_log *)this_cpu_ptr(&mce_data_buf); + memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); + errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); } else { int len, error_log_length; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 365249cd346b..fe0cca477164 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -155,7 +155,7 @@ int __init xics_smp_probe(void) void xics_teardown_cpu(void) { - struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr); + struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); /* * we have to reset the cppr index to 0 because we're -- GitLab From 13a7d299dbbcd4c76ff088ec240d7cd896174c2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 24 Aug 2014 14:52:46 +0200 Subject: [PATCH 0286/1868] drm/radeon: move the IB test after the AGP fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we won't test if the fallback to PCIe GART really worked. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a5d202a7c0a4..b6aee40e6ef3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1397,10 +1397,6 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; - r = radeon_ib_ring_tests(rdev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - r = radeon_gem_debugfs_init(rdev); if (r) { DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -1418,6 +1414,10 @@ int radeon_device_init(struct radeon_device *rdev, return r; } + r = radeon_ib_ring_tests(rdev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + if ((radeon_testing & 1)) { if (rdev->accel_working) radeon_test_moves(rdev); -- GitLab From b6a7eeeaa1cdf76f2522b75a2fd46280e8c3b3d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2013 15:41:25 +0200 Subject: [PATCH 0287/1868] drm/radeon: force UVD buffers into VRAM on RS[78]80 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: only necessary on RS[78]80 Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0669399efcea..db739bd64f16 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -137,10 +137,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) + !!r->write_domain; /* the first reloc of an UVD job is the msg and that must be in - VRAM, also but everything into VRAM on AGP cards to avoid - image corruptions */ + VRAM, also but everything into VRAM on AGP cards and older + IGP chips to avoid image corruptions */ if (p->ring == R600_RING_TYPE_UVD_INDEX && - (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { + (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || + p->rdev->family == CHIP_RS780 || + p->rdev->family == CHIP_RS880)) { + /* TODO: is this still needed for NI+ ? */ p->relocs[i].prefered_domains = RADEON_GEM_DOMAIN_VRAM; -- GitLab From a8fba64ab08cf4a5baf211f1126b475e03f90fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 25 Apr 2013 18:54:07 +0200 Subject: [PATCH 0288/1868] drm/radeon: properly init UVD MC bits on R600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 4 ++++ drivers/gpu/drm/radeon/r600d.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e8bf0ea2dade..e7dca47b7196 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -992,6 +992,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); @@ -1042,6 +1044,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp); radeon_gart_table_vram_unpin(rdev); } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 0c4a7d8d93e0..3df030dc2352 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -334,7 +334,7 @@ #define MC_VM_AGP_BOT 0x2188 #define MC_VM_AGP_BASE 0x218C #define MC_VM_FB_LOCATION 0x2180 -#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C +#define MC_VM_L1_TLB_MCB_RD_UVD_CNTL 0x2124 #define ENABLE_L1_TLB (1 << 0) #define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) #define ENABLE_L1_STRICT_ORDERING (1 << 2) @@ -354,12 +354,14 @@ #define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) #define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 #define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 +#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C #define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C #define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 +#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL 0x212c #define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 #define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 -- GitLab From 4a956a70a8d4cc5268a60f6718de58892fa1275e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Nov 2012 16:55:21 -0500 Subject: [PATCH 0289/1868] drm/radeon: add set_uvd_clocks callback for r6xx v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: wake up PLL, set [VD]CLK_SRC, cleanup code v3: handle RV670,RV635,RV620 as well v4: merge rv6xx and rs780/rs880 code, fix ref divider mask Signed-off-by: Alex Deucher Signed-off-by: Christian König --- drivers/gpu/drm/radeon/r600.c | 88 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/r600d.h | 26 ++++++++++ 2 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e7dca47b7196..011d97f6fc7f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev) int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) { + unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0; + int r; + + /* bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~( + UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK)); + + if (rdev->family >= CHIP_RS780) + WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL, + ~UPLL_BYPASS_CNTL); + + if (!vclk || !dclk) { + /* keep the Bypass mode, put PLL to sleep */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + return 0; + } + + if (rdev->clock.spll.reference_freq == 10000) + ref_div = 34; + else + ref_div = 4; + + r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000, + ref_div + 1, 0xFFF, 2, 30, ~0, + &fb_div, &vclk_div, &dclk_div); + if (r) + return r; + + if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780) + fb_div >>= 1; + else + fb_div |= 1; + + r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* assert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* For RS780 we have to choose ref clk */ + if (rdev->family >= CHIP_RS780) + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK, + ~UPLL_REFCLK_SRC_SEL_MASK); + + /* set the required fb, ref and post divder values */ + WREG32_P(CG_UPLL_FUNC_CNTL, + UPLL_FB_DIV(fb_div) | + UPLL_REF_DIV(ref_div), + ~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK)); + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_SW_HILEN(vclk_div >> 1) | + UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) | + UPLL_SW_HILEN2(dclk_div >> 1) | + UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) | + UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK, + ~UPLL_SW_MASK); + + /* give the PLL some time to settle */ + mdelay(15); + + /* deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* deassert BYPASS EN */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + if (rdev->family >= CHIP_RS780) + WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL); + + r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + return 0; } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 3df030dc2352..8c3fdd581a72 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1526,9 +1526,35 @@ #define UVD_CONTEXT_ID 0xf6f4 +/* rs780 only */ +#define GFX_MACRO_BYPASS_CNTL 0x30c0 +#define SPLL_BYPASS_CNTL (1 << 0) +#define UPLL_BYPASS_CNTL (1 << 1) + +#define CG_UPLL_FUNC_CNTL 0x7e0 +# define UPLL_RESET_MASK 0x00000001 +# define UPLL_SLEEP_MASK 0x00000002 +# define UPLL_BYPASS_EN_MASK 0x00000004 # define UPLL_CTLREQ_MASK 0x00000008 +# define UPLL_FB_DIV(x) ((x) << 4) +# define UPLL_FB_DIV_MASK 0x0000FFF0 +# define UPLL_REF_DIV(x) ((x) << 16) +# define UPLL_REF_DIV_MASK 0x003F0000 +# define UPLL_REFCLK_SRC_SEL_MASK 0x20000000 # define UPLL_CTLACK_MASK 0x40000000 # define UPLL_CTLACK2_MASK 0x80000000 +#define CG_UPLL_FUNC_CNTL_2 0x7e4 +# define UPLL_SW_HILEN(x) ((x) << 0) +# define UPLL_SW_LOLEN(x) ((x) << 4) +# define UPLL_SW_HILEN2(x) ((x) << 8) +# define UPLL_SW_LOLEN2(x) ((x) << 12) +# define UPLL_DIVEN_MASK 0x00010000 +# define UPLL_DIVEN2_MASK 0x00020000 +# define UPLL_SW_MASK 0x0003FFFF +# define VCLK_SRC_SEL(x) ((x) << 20) +# define VCLK_SRC_SEL_MASK 0x01F00000 +# define DCLK_SRC_SEL(x) ((x) << 25) +# define DCLK_SRC_SEL_MASK 0x3E000000 /* * PM4 -- GitLab From 856754c3a23a622d26a82b29fab6429481705511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2013 22:11:22 +0200 Subject: [PATCH 0290/1868] drm/radeon: add UVD support for older asics v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: cleanup R600 support v3: rebased on current drm-fixes-3.12 v4: rebased on drm-next-3.14 Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 31 +++++++++++ drivers/gpu/drm/radeon/r600d.h | 8 +++ drivers/gpu/drm/radeon/radeon_asic.c | 15 ++++++ drivers/gpu/drm/radeon/radeon_asic.h | 3 ++ drivers/gpu/drm/radeon/uvd_v1_0.c | 77 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/uvd_v2_2.c | 4 ++ 6 files changed, 138 insertions(+) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 011d97f6fc7f..14cb31e25c2f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3009,6 +3009,17 @@ static int r600_startup(struct radeon_device *rdev) return r; } + r = uvd_v1_0_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + } + } + if (r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -3037,6 +3048,16 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -3096,6 +3117,8 @@ int r600_suspend(struct radeon_device *rdev) radeon_pm_suspend(rdev); r600_audio_fini(rdev); r600_cp_stop(rdev); + uvd_v1_0_fini(rdev); + radeon_uvd_suspend(rdev); r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); @@ -3175,6 +3198,12 @@ int r600_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3204,6 +3233,8 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_cp_fini(rdev); r600_irq_fini(rdev); + uvd_v1_0_fini(rdev); + radeon_uvd_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 8c3fdd581a72..420bed19e139 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1485,6 +1485,7 @@ #define UVD_CGC_GATE 0xf4a8 #define UVD_LMI_CTRL2 0xf4f4 #define UVD_MASTINT_EN 0xf500 +#define UVD_FW_START 0xf51C #define UVD_LMI_ADDR_EXT 0xf594 #define UVD_LMI_CTRL 0xf598 #define UVD_LMI_SWAP_CNTL 0xf5b4 @@ -1497,6 +1498,13 @@ #define UVD_MPC_SET_MUX 0xf5f4 #define UVD_MPC_SET_ALU 0xf5f8 +#define UVD_VCPU_CACHE_OFFSET0 0xf608 +#define UVD_VCPU_CACHE_SIZE0 0xf60c +#define UVD_VCPU_CACHE_OFFSET1 0xf610 +#define UVD_VCPU_CACHE_SIZE1 0xf614 +#define UVD_VCPU_CACHE_OFFSET2 0xf618 +#define UVD_VCPU_CACHE_SIZE2 0xf61c + #define UVD_VCPU_CNTL 0xf660 #define UVD_SOFT_RESET 0xf680 #define RBC_SOFT_RESET (1<<0) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe09758..9e6699a9a0b4 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = { }, }; +static struct radeon_asic_ring rv6xx_uvd_ring = { + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v1_0_fence_emit, + .emit_semaphore = &uvd_v1_0_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, +}; + static struct radeon_asic rv6xx_asic = { .init = &r600_init, .fini = &r600_fini, @@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = { .ring = { [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring, }, .irq = { .set = &r600_irq_set, @@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = { .ring = { [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring, }, .irq = { .set = &r600_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc01780..987a3b713e06 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -883,6 +883,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void uvd_v1_0_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int uvd_v1_0_resume(struct radeon_device *rdev); int uvd_v1_0_init(struct radeon_device *rdev); void uvd_v1_0_fini(struct radeon_device *rdev); @@ -890,6 +891,8 @@ int uvd_v1_0_start(struct radeon_device *rdev); void uvd_v1_0_stop(struct radeon_device *rdev); int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +void uvd_v1_0_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index cda391347286..62d7086f0e08 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -22,6 +22,7 @@ * Authors: Christian König */ +#include #include #include "radeon.h" #include "radeon_asic.h" @@ -69,6 +70,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev, WREG32(UVD_RBC_RB_WPTR, ring->wptr); } +/** + * uvd_v1_0_fence_emit - emit an fence & trap command + * + * @rdev: radeon_device pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void uvd_v1_0_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 2); + return; +} + +/** + * uvd_v1_0_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v1_0_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = (rdev->uvd.gpu_addr >> 3) + 16; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr)); + + return 0; +} + /** * uvd_v1_0_init - start and test UVD block * diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c index 8bfdadd56598..89193519f8a1 100644 --- a/drivers/gpu/drm/radeon/uvd_v2_2.c +++ b/drivers/gpu/drm/radeon/uvd_v2_2.c @@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev) uint32_t chip_id, size; int r; + /* RV770 uses V1.0 MC */ + if (rdev->family == CHIP_RV770) + return uvd_v1_0_resume(rdev); + r = radeon_uvd_resume(rdev); if (r) return r; -- GitLab From 115365e8a33e4ce5e12bee7999568a26b4c33d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 25 Apr 2013 09:02:14 +0200 Subject: [PATCH 0291/1868] drm/radeon: implement UVD hw workarounds for R6xx v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the essentials, cause this hw generation is really buggy. v2: start supporting RV670,RV620 and RV635 as well v3: activate more workarounds Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600d.h | 3 +++ drivers/gpu/drm/radeon/uvd_v1_0.c | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 420bed19e139..671b48032a3d 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -330,6 +330,7 @@ #define HDP_TILING_CONFIG 0x2F3C #define HDP_DEBUG1 0x2F34 +#define MC_CONFIG 0x2000 #define MC_VM_AGP_TOP 0x2184 #define MC_VM_AGP_BOT 0x2188 #define MC_VM_AGP_BASE 0x218C @@ -375,6 +376,8 @@ #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 +#define RS_DQ_RD_RET_CONF 0x2348 + #define PA_CL_ENHANCE 0x8A14 #define CLIP_VTX_REORDER_ENA (1 << 0) #define NUM_CLIP_SEQ(x) ((x) << 1) diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 62d7086f0e08..c3e182bc6c59 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -207,8 +207,32 @@ int uvd_v1_0_init(struct radeon_device *rdev) /* lower clocks again */ radeon_set_uvd_clocks(rdev, 0, 0); - if (!r) + if (!r) { + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV620: + /* 64byte granularity workaround */ + WREG32(MC_CONFIG, 0); + WREG32(MC_CONFIG, 1 << 4); + WREG32(RS_DQ_RD_RET_CONF, 0x3f); + WREG32(MC_CONFIG, 0x1f); + + /* fall through */ + case CHIP_RV670: + case CHIP_RV635: + + /* write clean workaround */ + WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10); + break; + + default: + /* TODO: Do we need more? */ + break; + } + DRM_INFO("UVD initialized successfully.\n"); + } return r; } -- GitLab From bdc99722d007ed1db7188b09404bda080d1d737a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Aug 2014 13:11:36 -0400 Subject: [PATCH 0292/1868] drm/radeon: 760G/780V/880V don't have UVD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't enable UVD on these asics as they don't have UVD hardware. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 57 ++++++++++++++++------------ drivers/gpu/drm/radeon/radeon_asic.c | 10 ++++- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 14cb31e25c2f..a95ced569d84 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3009,15 +3009,16 @@ static int r600_startup(struct radeon_device *rdev) return r; } - r = uvd_v1_0_resume(rdev); - if (!r) { - r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); - if (r) { - dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + if (rdev->has_uvd) { + r = uvd_v1_0_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + } } - } - if (r) { - rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; } /* Enable IRQ */ @@ -3048,14 +3049,16 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; - ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, - RADEON_CP_PACKET2); - if (!r) - r = uvd_v1_0_init(rdev); - if (r) - DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + if (rdev->has_uvd) { + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } } r = radeon_ib_pool_init(rdev); @@ -3117,8 +3120,10 @@ int r600_suspend(struct radeon_device *rdev) radeon_pm_suspend(rdev); r600_audio_fini(rdev); r600_cp_stop(rdev); - uvd_v1_0_fini(rdev); - radeon_uvd_suspend(rdev); + if (rdev->has_uvd) { + uvd_v1_0_fini(rdev); + radeon_uvd_suspend(rdev); + } r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); @@ -3198,10 +3203,12 @@ int r600_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); - r = radeon_uvd_init(rdev); - if (!r) { - rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; - r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + if (rdev->has_uvd) { + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + } } rdev->ih.ring_obj = NULL; @@ -3233,8 +3240,10 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_cp_fini(rdev); r600_irq_fini(rdev); - uvd_v1_0_fini(rdev); - radeon_uvd_fini(rdev); + if (rdev->has_uvd) { + uvd_v1_0_fini(rdev); + radeon_uvd_fini(rdev); + } radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 9e6699a9a0b4..d91f965e8219 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2313,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RS780: case CHIP_RS880: rdev->asic = &rs780_asic; - rdev->has_uvd = true; + /* 760G/780V/880V don't have UVD */ + if ((rdev->pdev->device == 0x9616)|| + (rdev->pdev->device == 0x9611)|| + (rdev->pdev->device == 0x9613)|| + (rdev->pdev->device == 0x9711)|| + (rdev->pdev->device == 0x9713)) + rdev->has_uvd = false; + else + rdev->has_uvd = true; break; case CHIP_RV770: case CHIP_RV730: -- GitLab From 32517d59ebb3b6eb7a5a5736020072ce7e609e76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 09:59:45 +0200 Subject: [PATCH 0293/1868] drm/radeon: enable RB_ARB before resetting the VCPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes "UVD not responding, trying to reset the VCPU" messages on earlier ASICs. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/uvd_v1_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index c3e182bc6c59..e72b3cb59358 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -319,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* enable UMC */ WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + /* boot up the VCPU */ WREG32(UVD_SOFT_RESET, 0); mdelay(10); - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); - for (i = 0; i < 10; ++i) { uint32_t status; for (j = 0; j < 100; ++j) { -- GitLab From 14e935aeb0213e2ef522d0d8a14d2d29fb194956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2013 22:11:37 +0200 Subject: [PATCH 0294/1868] drm/radeon: add UVD fw names for older asic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Activating the UVD support. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_uvd.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 341848a14376..a6ebaf0bda15 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -40,12 +40,18 @@ #define UVD_IDLE_TIMEOUT_MS 1000 /* Firmware Names */ +#define FIRMWARE_R600 "radeon/R600_uvd.bin" +#define FIRMWARE_RS780 "radeon/RS780_uvd.bin" +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin" +MODULE_FIRMWARE(FIRMWARE_R600); +MODULE_FIRMWARE(FIRMWARE_RS780); +MODULE_FIRMWARE(FIRMWARE_RV770); MODULE_FIRMWARE(FIRMWARE_RV710); MODULE_FIRMWARE(FIRMWARE_CYPRESS); MODULE_FIRMWARE(FIRMWARE_SUMO); @@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); switch (rdev->family) { + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + fw_name = FIRMWARE_R600; + break; + + case CHIP_RS780: + case CHIP_RS880: + fw_name = FIRMWARE_RS780; + break; + + case CHIP_RV770: + fw_name = FIRMWARE_RV770; + break; + case CHIP_RV710: case CHIP_RV730: case CHIP_RV740: -- GitLab From f0d970b4fd05cb7af89307bb17689c18c835d739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:21:53 +0200 Subject: [PATCH 0295/1868] drm/radeon: wake up all fences on manual reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wake up all fences when we manually trigger a reset. Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 913787085dfa..5bd837afb939 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -833,6 +833,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data) down_read(&rdev->exclusive_lock); seq_printf(m, "%d\n", rdev->needs_reset); rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); up_read(&rdev->exclusive_lock); return 0; -- GitLab From eb98c709907c7a78b9cd0d18642477d47d348f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:21:56 +0200 Subject: [PATCH 0296/1868] drm/radeon: force fence completion only on problematic rings (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of resetting all fence numbers, only reset the number of the problematic ring. Split out from a patch from Maarten Lankhorst v2 (agd5f): rebase build fix Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 8 ++------ drivers/gpu/drm/radeon/radeon_fence.c | 12 ++++-------- drivers/gpu/drm/radeon/radeon_ib.c | 1 + 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b321ad4dcafd..5f967c0b5aa3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -371,7 +371,7 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -void radeon_fence_driver_force_completion(struct radeon_device *rdev); +void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b6aee40e6ef3..8b442e4ab1da 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1488,7 +1488,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) struct drm_crtc *crtc; struct drm_connector *connector; int i, r; - bool force_completion = false; if (dev == NULL || dev->dev_private == NULL) { return -ENODEV; @@ -1532,12 +1531,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) r = radeon_fence_wait_empty(rdev, i); if (r) { /* delay GPU reset to resume */ - force_completion = true; + radeon_fence_driver_force_completion(rdev, i); } } - if (force_completion) { - radeon_fence_driver_force_completion(rdev); - } radeon_save_bios_scratch_regs(rdev); @@ -1722,8 +1718,8 @@ int radeon_gpu_reset(struct radeon_device *rdev) } } } else { - radeon_fence_driver_force_completion(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { + radeon_fence_driver_force_completion(rdev, i); kfree(ring_data[i]); } } diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 5bd837afb939..e8a28e7b39c7 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -758,7 +758,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) r = radeon_fence_wait_empty(rdev, ring); if (r) { /* no need to trigger GPU reset as we are unloading */ - radeon_fence_driver_force_completion(rdev); + radeon_fence_driver_force_completion(rdev, ring); } wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); @@ -771,19 +771,15 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) * radeon_fence_driver_force_completion - force all fence waiter to complete * * @rdev: radeon device pointer + * @ring: the ring to complete * * In case of GPU reset failure make sure no process keep waiting on fence * that will never complete. */ -void radeon_fence_driver_force_completion(struct radeon_device *rdev) +void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) { - int ring; - - for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { - if (!rdev->fence_drv[ring].initialized) - continue; + if (rdev->fence_drv[ring].initialized) radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); - } } diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 5bf2c0a05827..6fc7461d70c4 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -269,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev) r = radeon_ib_test(rdev, i, ring); if (r) { + radeon_fence_driver_force_completion(rdev, i); ring->ready = false; rdev->needs_reset = false; -- GitLab From 9bb39ff43e15e85bc1bd9bbbdc5b9cef7a670fd5 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 27 Aug 2014 16:45:18 -0400 Subject: [PATCH 0297/1868] drm/radeon: take exclusive_lock in read mode during ring tests, v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed for the next commit, because the lockup detection will need the read lock to run. v4 (chk): split out forced fence completion, remove unrelated changes, add and handle in_reset flag v5 (agd5f): rebase fix Signed-off-by: Maarten Lankhorst Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_cs.c | 7 +++++ drivers/gpu/drm/radeon/radeon_device.c | 37 +++++++++++-------------- drivers/gpu/drm/radeon/radeon_display.c | 4 ++- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5f967c0b5aa3..8cd1b3f60d4a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2326,7 +2326,7 @@ struct radeon_device { bool need_dma32; bool accel_working; bool fastfb_working; /* IGP feature*/ - bool needs_reset; + bool needs_reset, in_reset; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; const struct firmware *me_fw; /* all family ME firmware */ const struct firmware *pfp_fw; /* r6/700 PFP firmware */ diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index db739bd64f16..bd328cb6fa61 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -653,6 +653,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) up_read(&rdev->exclusive_lock); return -EBUSY; } + if (rdev->in_reset) { + up_read(&rdev->exclusive_lock); + r = radeon_gpu_reset(rdev); + if (!r) + r = -EAGAIN; + return r; + } /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 8b442e4ab1da..9f666370b5ac 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1673,6 +1673,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) return 0; } + rdev->in_reset = true; rdev->needs_reset = false; radeon_save_bios_scratch_regs(rdev); @@ -1691,7 +1692,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) } } -retry: r = radeon_asic_reset(rdev); if (!r) { dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n"); @@ -1700,25 +1700,11 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_restore_bios_scratch_regs(rdev); - if (!r) { - for (i = 0; i < RADEON_NUM_RINGS; ++i) { + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!r && ring_data[i]) { radeon_ring_restore(rdev, &rdev->ring[i], ring_sizes[i], ring_data[i]); - ring_sizes[i] = 0; - ring_data[i] = NULL; - } - - r = radeon_ib_ring_tests(rdev); - if (r) { - dev_err(rdev->dev, "ib ring test failed (%d).\n", r); - if (saved) { - saved = false; - radeon_suspend(rdev); - goto retry; - } - } - } else { - for (i = 0; i < RADEON_NUM_RINGS; ++i) { + } else { radeon_fence_driver_force_completion(rdev, i); kfree(ring_data[i]); } @@ -1751,19 +1737,28 @@ int radeon_gpu_reset(struct radeon_device *rdev) /* reset hpd state */ radeon_hpd_init(rdev); + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); + downgrade_write(&rdev->exclusive_lock); + drm_helper_resume_force_mode(rdev->ddev); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) radeon_pm_compute_clocks(rdev); - ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); - if (r) { + if (!r) { + r = radeon_ib_ring_tests(rdev); + if (r && saved) + r = -EAGAIN; + } else { /* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); } - up_write(&rdev->exclusive_lock); + rdev->needs_reset = r == -EAGAIN; + rdev->in_reset = false; + + up_read(&rdev->exclusive_lock); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 3fdf87318069..bd0d687379ee 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work) r = radeon_fence_wait(work->fence, false); if (r == -EDEADLK) { up_read(&rdev->exclusive_lock); - r = radeon_gpu_reset(rdev); + do { + r = radeon_gpu_reset(rdev); + } while (r == -EAGAIN); down_read(&rdev->exclusive_lock); } if (r) -- GitLab From 0bfa4b41268ad5fd741f16f484e4fee190822ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:21:58 +0200 Subject: [PATCH 0298/1868] drm/radeon: handle lockup in delayed work, v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v5 (chk): complete rework, start when the first fence is emitted, stop when the last fence is signalled, make it work correctly with GPU resets, cleanup radeon_fence_wait_seq Signed-off-by: Maarten Lankhorst Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_fence.c | 200 ++++++++++++++++---------- 2 files changed, 124 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cd1b3f60d4a..74919ef57ac3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -350,6 +350,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw, * Fences. */ struct radeon_fence_driver { + struct radeon_device *rdev; uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; @@ -357,6 +358,7 @@ struct radeon_fence_driver { uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; bool initialized; + struct delayed_work lockup_work; }; struct radeon_fence { diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index e8a28e7b39c7..ac15f3418478 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -97,6 +97,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) return seq; } +/** + * radeon_fence_schedule_check - schedule lockup check + * + * @rdev: radeon_device pointer + * @ring: ring index we should work with + * + * Queues a delayed work item to check for lockups. + */ +static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring) +{ + /* + * Do not reset the timer here with mod_delayed_work, + * this can livelock in an interaction with TTM delayed destroy. + */ + queue_delayed_work(system_power_efficient_wq, + &rdev->fence_drv[ring].lockup_work, + RADEON_FENCE_JIFFIES_TIMEOUT); +} + /** * radeon_fence_emit - emit a fence on the requested ring * @@ -122,19 +141,21 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + radeon_fence_schedule_check(rdev, ring); return 0; } /** - * radeon_fence_process - process a fence + * radeon_fence_activity - check for fence activity * * @rdev: radeon_device pointer * @ring: ring index the fence is associated with * - * Checks the current fence value and wakes the fence queue - * if the sequence number has increased (all asics). + * Checks the current fence value and calculates the last + * signalled fence value. Returns true if activity occured + * on the ring, and the fence_queue should be waken up. */ -void radeon_fence_process(struct radeon_device *rdev, int ring) +static bool radeon_fence_activity(struct radeon_device *rdev, int ring) { uint64_t seq, last_seq, last_emitted; unsigned count_loop = 0; @@ -190,7 +211,67 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) } } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); - if (wake) + if (seq < last_emitted) + radeon_fence_schedule_check(rdev, ring); + + return wake; +} + +/** + * radeon_fence_check_lockup - check for hardware lockup + * + * @work: delayed work item + * + * Checks for fence activity and if there is none probe + * the hardware if a lockup occured. + */ +static void radeon_fence_check_lockup(struct work_struct *work) +{ + struct radeon_fence_driver *fence_drv; + struct radeon_device *rdev; + int ring; + + fence_drv = container_of(work, struct radeon_fence_driver, + lockup_work.work); + rdev = fence_drv->rdev; + ring = fence_drv - &rdev->fence_drv[0]; + + if (!down_read_trylock(&rdev->exclusive_lock)) { + /* just reschedule the check if a reset is going on */ + radeon_fence_schedule_check(rdev, ring); + return; + } + + if (radeon_fence_activity(rdev, ring)) + wake_up_all(&rdev->fence_queue); + + else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { + + /* good news we believe it's a lockup */ + dev_warn(rdev->dev, "GPU lockup (current fence id " + "0x%016llx last fence id 0x%016llx on ring %d)\n", + (uint64_t)atomic64_read(&fence_drv->last_seq), + fence_drv->sync_seq[ring], ring); + + /* remember that we need an reset */ + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } + up_read(&rdev->exclusive_lock); +} + +/** + * radeon_fence_process - process a fence + * + * @rdev: radeon_device pointer + * @ring: ring index the fence is associated with + * + * Checks the current fence value and wakes the fence queue + * if the sequence number has increased (all asics). + */ +void radeon_fence_process(struct radeon_device *rdev, int ring) +{ + if (radeon_fence_activity(rdev, ring)) wake_up_all(&rdev->fence_queue); } @@ -300,86 +381,43 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, bool intr) { - uint64_t last_seq[RADEON_NUM_RINGS]; - bool signaled; - int i, r; - - while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { + long r; + int i; - /* Save current sequence values, used to check for GPU lockups */ - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; + if (radeon_fence_any_seq_signaled(rdev, target_seq)) + return 0; - last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq); - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); - radeon_irq_kms_sw_irq_get(rdev, i); - } + /* enable IRQs and tracing */ + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) + continue; - if (intr) { - r = wait_event_interruptible_timeout(rdev->fence_queue, ( - (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) - || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); - } else { - r = wait_event_timeout(rdev->fence_queue, ( - (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) - || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); - } + trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + radeon_irq_kms_sw_irq_get(rdev, i); + } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; + if (intr) { + r = wait_event_interruptible_timeout(rdev->fence_queue, ( + radeon_fence_any_seq_signaled(rdev, target_seq) + || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + } else { + r = wait_event_timeout(rdev->fence_queue, ( + radeon_fence_any_seq_signaled(rdev, target_seq) + || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + } - radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); - } + if (rdev->needs_reset) + r = -EDEADLK; - if (unlikely(r < 0)) - return r; + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) + continue; - if (unlikely(!signaled)) { - if (rdev->needs_reset) - return -EDEADLK; - - /* we were interrupted for some reason and fence - * isn't signaled yet, resume waiting */ - if (r) - continue; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq)) - break; - } - - if (i != RADEON_NUM_RINGS) - continue; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i])) - break; - } - - if (i < RADEON_NUM_RINGS) { - /* good news we believe it's a lockup */ - dev_warn(rdev->dev, "GPU lockup (waiting for " - "0x%016llx last fence id 0x%016llx on" - " ring %d)\n", - target_seq[i], last_seq[i], i); - - /* remember that we need an reset */ - rdev->needs_reset = true; - wake_up_all(&rdev->fence_queue); - return -EDEADLK; - } - } + radeon_irq_kms_sw_irq_put(rdev, i); + trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); } - return 0; + + return r < 0 ? r : 0; } /** @@ -711,6 +749,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].sync_seq[i] = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].initialized = false; + INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work, + radeon_fence_check_lockup); + rdev->fence_drv[ring].rdev = rdev; } /** @@ -760,6 +801,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) /* no need to trigger GPU reset as we are unloading */ radeon_fence_driver_force_completion(rdev, ring); } + cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); rdev->fence_drv[ring].initialized = false; @@ -778,8 +820,10 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) */ void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) { - if (rdev->fence_drv[ring].initialized) + if (rdev->fence_drv[ring].initialized) { radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); + cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); + } } -- GitLab From 9867d00dbaef42e346e5d12eaa9591b057fea6d8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 27 Aug 2014 15:21:59 +0200 Subject: [PATCH 0299/1868] drm/radeon: add timeout argument to radeon_fence_wait_seq v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it possible to wait for a specific amount of time, rather than wait until infinity. v2 (chk): rebased on other changes Signed-off-by: Maarten Lankhorst Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_fence.c | 48 ++++++++++++++++----------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index ac15f3418478..a54bfd60510b 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -364,28 +364,31 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) } /** - * radeon_fence_wait_seq - wait for a specific sequence numbers + * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers * * @rdev: radeon device pointer * @target_seq: sequence number(s) we want to wait for * @intr: use interruptable sleep + * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait * * Wait for the requested sequence number(s) to be written by any ring * (all asics). Sequnce number array is indexed by ring id. * @intr selects whether to use interruptable (true) or non-interruptable * (false) sleep when waiting for the sequence number. Helper function * for radeon_fence_wait_*(). - * Returns 0 if the sequence number has passed, error for all other cases. + * Returns remaining time if the sequence number has passed, 0 when + * the wait timeout, or an error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected. */ -static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, - bool intr) +static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, + u64 *target_seq, bool intr, + long timeout) { long r; int i; if (radeon_fence_any_seq_signaled(rdev, target_seq)) - return 0; + return timeout; /* enable IRQs and tracing */ for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -399,11 +402,11 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, if (intr) { r = wait_event_interruptible_timeout(rdev->fence_queue, ( radeon_fence_any_seq_signaled(rdev, target_seq) - || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + || rdev->needs_reset), timeout); } else { r = wait_event_timeout(rdev->fence_queue, ( radeon_fence_any_seq_signaled(rdev, target_seq) - || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + || rdev->needs_reset), timeout); } if (rdev->needs_reset) @@ -417,14 +420,14 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); } - return r < 0 ? r : 0; + return r; } /** * radeon_fence_wait - wait for a fence to signal * * @fence: radeon fence object - * @intr: use interruptable sleep + * @intr: use interruptible sleep * * Wait for the requested fence to signal (all asics). * @intr selects whether to use interruptable (true) or non-interruptable @@ -434,7 +437,7 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, int radeon_fence_wait(struct radeon_fence *fence, bool intr) { uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; + long r; if (fence == NULL) { WARN(1, "Querying an invalid fence : %p !\n", fence); @@ -445,9 +448,10 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) return 0; - r = radeon_fence_wait_seq(fence->rdev, seq, intr); - if (r) + r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { return r; + } fence->seq = RADEON_FENCE_SIGNALED_SEQ; return 0; @@ -472,7 +476,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, { uint64_t seq[RADEON_NUM_RINGS]; unsigned i, num_rings = 0; - int r; + long r; for (i = 0; i < RADEON_NUM_RINGS; ++i) { seq[i] = 0; @@ -493,8 +497,8 @@ int radeon_fence_wait_any(struct radeon_device *rdev, if (num_rings == 0) return -ENOENT; - r = radeon_fence_wait_seq(rdev, seq, intr); - if (r) { + r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { return r; } return 0; @@ -513,6 +517,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; + long r; seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) { @@ -520,7 +525,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) already the last emited fence */ return -ENOENT; } - return radeon_fence_wait_seq(rdev, seq, false); + r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + return 0; } /** @@ -536,18 +544,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; + long r; seq[ring] = rdev->fence_drv[ring].sync_seq[ring]; if (!seq[ring]) return 0; - r = radeon_fence_wait_seq(rdev, seq, false); - if (r) { + r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { if (r == -EDEADLK) return -EDEADLK; - dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", + dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n", ring, r); } return 0; -- GitLab From d6d5c5b8364bcc4d52cddc68bcb0a330d2af20f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:22:00 +0200 Subject: [PATCH 0300/1868] drm/radeon: drop RADEON_FENCE_SIGNALED_SEQ v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's causing issues with VMID handling and comparing the fence value two times actually doesn't make handling faster. v2: rebased on reset changes Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 3 --- drivers/gpu/drm/radeon/radeon_fence.c | 18 ++---------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 74919ef57ac3..896a84779650 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -120,9 +120,6 @@ extern int radeon_bapm; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 -/* fence seq are set to this number when signaled */ -#define RADEON_FENCE_SIGNALED_SEQ 0LL - /* internal ring indices */ /* r1xx+ has gfx CP ring */ #define RADEON_RING_TYPE_GFX_INDEX 0 diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index a54bfd60510b..ecdba3afa2c3 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -328,16 +328,10 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, */ bool radeon_fence_signaled(struct radeon_fence *fence) { - if (!fence) { - return true; - } - if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { + if (!fence) return true; - } - if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { - fence->seq = RADEON_FENCE_SIGNALED_SEQ; + if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) return true; - } return false; } @@ -445,15 +439,11 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) } seq[fence->ring] = fence->seq; - if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) - return 0; - r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) { return r; } - fence->seq = RADEON_FENCE_SIGNALED_SEQ; return 0; } @@ -487,10 +477,6 @@ int radeon_fence_wait_any(struct radeon_device *rdev, seq[i] = fences[i]->seq; ++num_rings; - - /* test if something was allready signaled */ - if (seq[i] == RADEON_FENCE_SIGNALED_SEQ) - return 0; } /* nothing to wait for ? */ -- GitLab From 3c0363891c0fa5d17b683b758bff0d81fa6a9775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:22:01 +0200 Subject: [PATCH 0301/1868] drm/radeon: drop doing resets in a work item MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking completely innocent processes with a GPU reset is a pretty bad idea. Just set needs_reset and let the next command submission or fence wait do the job. Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 6 ++++-- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_device.c | 7 ++++--- drivers/gpu/drm/radeon/radeon_irq_kms.c | 18 ------------------ 4 files changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 79a5a5519bd6..1f598ab3b9a7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -8246,8 +8246,10 @@ int cik_irq_process(struct radeon_device *rdev) } if (queue_hotplug) schedule_work(&rdev->hotplug_work); - if (queue_reset) - schedule_work(&rdev->reset_work); + if (queue_reset) { + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 896a84779650..c163b4f89149 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2346,7 +2346,6 @@ struct radeon_device { struct radeon_mec mec; struct work_struct hotplug_work; struct work_struct audio_work; - struct work_struct reset_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ bool has_uvd; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 9f666370b5ac..d30f1cc1aa12 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1673,9 +1673,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) return 0; } - rdev->in_reset = true; - rdev->needs_reset = false; - radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); @@ -1738,6 +1735,10 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_hpd_init(rdev); ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); + + rdev->in_reset = true; + rdev->needs_reset = false; + downgrade_write(&rdev->exclusive_lock); drm_helper_resume_force_mode(rdev->ddev); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 16807afab362..f0bff4be67f1 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -87,23 +87,6 @@ static void radeon_hotplug_work_func(struct work_struct *work) drm_helper_hpd_irq_event(dev); } -/** - * radeon_irq_reset_work_func - execute gpu reset - * - * @work: work struct - * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. - */ -static void radeon_irq_reset_work_func(struct work_struct *work) -{ - struct radeon_device *rdev = container_of(work, struct radeon_device, - reset_work); - - radeon_gpu_reset(rdev); -} - /** * radeon_driver_irq_preinstall_kms - drm irq preinstall callback * @@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev) INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); - INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func); rdev->irq.installed = true; r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq); -- GitLab From 3852752ca89ca00aa13f12a9b9450fd97ff437d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 21 Aug 2014 12:18:12 +0200 Subject: [PATCH 0302/1868] drm/radeon: allow UVD to use a second 256MB segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves concurrent stream decoding. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_object.c | 5 +++-- drivers/gpu/drm/radeon/radeon_uvd.c | 20 ++++++++++++++++++-- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a5ac95bf05ea..83a24614138a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1642,7 +1642,8 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains); void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp); int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 0129c7efae3b..c97a42432e2b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -491,6 +491,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, bo = lobj->robj; if (!bo->pin_count) { u32 domain = lobj->prefered_domains; + u32 allowed = lobj->allowed_domains; u32 current_domain = radeon_mem_type_to_domain(bo->tbo.mem.mem_type); @@ -502,7 +503,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, * into account. We don't want to disallow buffer moves * completely. */ - if ((lobj->allowed_domains & current_domain) != 0 && + if ((allowed & current_domain) != 0 && (domain & current_domain) == 0 && /* will be moved */ bytes_moved > bytes_moved_threshold) { /* don't move it */ @@ -512,7 +513,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, retry: radeon_ttm_placement_from_domain(bo, domain); if (ring == R600_RING_TYPE_UVD_INDEX) - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, allowed); initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 464d80145dfe..1dedadd8f5df 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -254,7 +254,8 @@ int radeon_uvd_resume(struct radeon_device *rdev) return 0; } -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains) { int i; @@ -262,6 +263,21 @@ void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; } + + /* If it must be in VRAM it must be in the first segment as well */ + if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) + return; + + /* abort if we already have more than one placement */ + if (rbo->placement.num_placement > 1) + return; + + /* add another 256MB segment */ + rbo->placements[1] = rbo->placements[0]; + rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placement.num_placement++; + rbo->placement.num_busy_placement++; } void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) @@ -652,7 +668,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, return r; radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, RADEON_GEM_DOMAIN_VRAM); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (r) -- GitLab From feba9b0bcf492ba991d7fbfc211dd49ebbc95a4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 22 Aug 2014 14:25:55 +0200 Subject: [PATCH 0303/1868] drm/radeon: preallocate mem for UVD create/destroy msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit llocating memory for UVD create and destroy messages can fail, which is rather annoying when this happens in the middle of a GPU reset. Try to avoid this condition by preallocating a page for those dummy messages. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_uvd.c | 101 +++++++--------------------- 1 file changed, 26 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 1dedadd8f5df..5729e9bebd9d 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -138,7 +138,8 @@ int radeon_uvd_init(struct radeon_device *rdev) } bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + - RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + + RADEON_GPU_PAGE_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); if (r) { @@ -647,38 +648,16 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p) } static int radeon_uvd_send_msg(struct radeon_device *rdev, - int ring, struct radeon_bo *bo, + int ring, uint64_t addr, struct radeon_fence **fence) { - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; struct radeon_ib ib; - uint64_t addr; int i, r; - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head); - if (r) - return r; - - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); - radeon_uvd_force_into_uvd_segment(bo, RADEON_GEM_DOMAIN_VRAM); - - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - if (r) - goto err; - r = radeon_ib_get(rdev, ring, &ib, NULL, 64); if (r) - goto err; + return r; - addr = radeon_bo_gpu_offset(bo); ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); ib.ptr[1] = addr; ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); @@ -690,19 +669,11 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, ib.length_dw = 16; r = radeon_ib_schedule(rdev, &ib, NULL, false); - if (r) - goto err; - ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); if (fence) *fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib); - radeon_bo_unref(&bo); - return 0; - -err: - ttm_eu_backoff_reservation(&ticket, &head); return r; } @@ -712,27 +683,18 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence) { - struct radeon_bo *bo; - uint32_t *msg; - int r, i; + /* we use the last page of the vcpu bo for the UVD message */ + uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - + RADEON_GPU_PAGE_SIZE; - r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo); - if (r) - return r; + uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint64_t addr = rdev->uvd.gpu_addr + offs; - r = radeon_bo_reserve(bo, false); - if (r) { - radeon_bo_unref(&bo); - return r; - } + int r, i; - r = radeon_bo_kmap(bo, (void **)&msg); - if (r) { - radeon_bo_unreserve(bo); - radeon_bo_unref(&bo); + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); + if (r) return r; - } /* stitch together an UVD create msg */ msg[0] = cpu_to_le32(0x00000de4); @@ -749,36 +711,26 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, for (i = 11; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - radeon_bo_kunmap(bo); - radeon_bo_unreserve(bo); - - return radeon_uvd_send_msg(rdev, ring, bo, fence); + r = radeon_uvd_send_msg(rdev, ring, addr, fence); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + return r; } int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence) { - struct radeon_bo *bo; - uint32_t *msg; - int r, i; + /* we use the last page of the vcpu bo for the UVD message */ + uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - + RADEON_GPU_PAGE_SIZE; - r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo); - if (r) - return r; + uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint64_t addr = rdev->uvd.gpu_addr + offs; - r = radeon_bo_reserve(bo, false); - if (r) { - radeon_bo_unref(&bo); - return r; - } + int r, i; - r = radeon_bo_kmap(bo, (void **)&msg); - if (r) { - radeon_bo_unreserve(bo); - radeon_bo_unref(&bo); + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); + if (r) return r; - } /* stitch together an UVD destroy msg */ msg[0] = cpu_to_le32(0x00000de4); @@ -788,10 +740,9 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, for (i = 4; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - radeon_bo_kunmap(bo); - radeon_bo_unreserve(bo); - - return radeon_uvd_send_msg(rdev, ring, bo, fence); + r = radeon_uvd_send_msg(rdev, ring, addr, fence); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + return r; } /** -- GitLab From 58d06e989e1321b43c1e42bdf3846113e9e16ecd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Aug 2014 15:18:44 +0200 Subject: [PATCH 0304/1868] dmaengine: Update documentation for inline wrapper Commit 16052827d98fbc13c31ebad560af4bd53e2b4dd5 ("dmaengine/dma_slave: introduce inline wrappers") introduced some wrappers, but there is still a reference to the old function. Update the documentation to use the wrapper, and add a missing "()" to a function name. Signed-off-by: Geert Uytterhoeven Signed-off-by: Vinod Koul --- Documentation/dmaengine.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt index 573e28ce9751..11fb87ff6cd0 100644 --- a/Documentation/dmaengine.txt +++ b/Documentation/dmaengine.txt @@ -98,7 +98,7 @@ The slave DMA usage consists of following steps: unsigned long flags); The peripheral driver is expected to have mapped the scatterlist for - the DMA operation prior to calling device_prep_slave_sg, and must + the DMA operation prior to calling dmaengine_prep_slave_sg(), and must keep the scatterlist mapped until the DMA operation has completed. The scatterlist must be mapped using the DMA struct device. If a mapping needs to be synchronized later, dma_sync_*_for_*() must be @@ -195,5 +195,5 @@ Further APIs: Note: Not all DMA engine drivers can return reliable information for a running DMA channel. It is recommended that DMA engine users - pause or stop (via dmaengine_terminate_all) the channel before + pause or stop (via dmaengine_terminate_all()) the channel before using this API. -- GitLab From b19f40b8bf87bfc32b91260a90a7fa2cdebcd9bb Mon Sep 17 00:00:00 2001 From: Ryo Kataoka Date: Wed, 20 Aug 2014 17:53:03 -0700 Subject: [PATCH 0305/1868] dma: rcar-audmapp: Fix for no corresponding slave ID In case of no corresponding slave ID, the audmapp_set_slave() returns -ENXIO same as sh_dmae_set_slave() of shdmac.c. DMAEngine might return wrong channel without this patch Signed-off-by: Ryo Kataoka Signed-off-by: Jun Watanabe , Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-audmapp.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c index dabbf0aba2e9..80fd2aeb4870 100644 --- a/drivers/dma/sh/rcar-audmapp.c +++ b/drivers/dma/sh/rcar-audmapp.c @@ -117,7 +117,7 @@ static void audmapp_start_xfer(struct shdma_chan *schan, audmapp_write(auchan, chcr, PDMACHCR); } -static void audmapp_get_config(struct audmapp_chan *auchan, int slave_id, +static int audmapp_get_config(struct audmapp_chan *auchan, int slave_id, u32 *chcr, dma_addr_t *dst) { struct audmapp_device *audev = to_dev(auchan); @@ -131,20 +131,22 @@ static void audmapp_get_config(struct audmapp_chan *auchan, int slave_id, if (!pdata) { /* DT */ *chcr = ((u32)slave_id) << 16; auchan->shdma_chan.slave_id = (slave_id) >> 8; - return; + return 0; } /* non-DT */ if (slave_id >= AUDMAPP_SLAVE_NUMBER) - return; + return -ENXIO; for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) if (cfg->slave_id == slave_id) { *chcr = cfg->chcr; *dst = cfg->dst; - break; + return 0; } + + return -ENXIO; } static int audmapp_set_slave(struct shdma_chan *schan, int slave_id, @@ -153,8 +155,11 @@ static int audmapp_set_slave(struct shdma_chan *schan, int slave_id, struct audmapp_chan *auchan = to_chan(schan); u32 chcr; dma_addr_t dst; + int ret; - audmapp_get_config(auchan, slave_id, &chcr, &dst); + ret = audmapp_get_config(auchan, slave_id, &chcr, &dst); + if (ret < 0) + return ret; if (try) return 0; -- GitLab From e34b731faa7d12d3681187968ef899747e4feb55 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Aug 2014 11:55:53 +0100 Subject: [PATCH 0306/1868] dma: imx-sdma: Remove spurious __init annotation on sdma_probe() We can't annotate probe functions as __init since binding can occur at any time, not just during kernel init. Signed-off-by: Mark Brown Acked-by: Shawn Guo Signed-off-by: Vinod Koul --- drivers/dma/imx-sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index c615e88c118a..52ce1d21154c 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1448,7 +1448,7 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec, return dma_request_channel(mask, sdma_filter_fn, &data); } -static int __init sdma_probe(struct platform_device *pdev) +static int sdma_probe(struct platform_device *pdev) { const struct of_device_id *of_id = of_match_device(sdma_dt_ids, &pdev->dev); -- GitLab From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 26 Aug 2014 19:12:21 -0500 Subject: [PATCH 0307/1868] percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t __get_cpu_var can paper over differences in the definitions of cpumask_var_t and either use the address of the cpumask variable directly or perform a fetch of the address of the struct cpumask allocated elsewhere. This is important particularly when using per cpu cpumask_var_t declarations because in one case we have an offset into a per cpu area to handle and in the other case we need to fetch a pointer from the offset. This patch introduces a new macro this_cpu_cpumask_var_ptr() that is defined where cpumask_var_t is defined and performs the proper actions. All use cases where __get_cpu_var is used with cpumask_var_t are converted to the use of this_cpu_cpumask_var_ptr(). Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/perf_event_p4.h | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 3 +-- arch/x86/oprofile/op_model_p4.c | 2 +- include/linux/cpumask.h | 11 +++++++++++ kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 2 +- 7 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 85e13ccf15c4..d725382c2ae0 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP if (smp_num_siblings == 2) - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 6ce600f9bc78..1f5d5f2ffae6 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) * We are to modify mask, so we need an own copy * and be sure it's manipulated with irq off. */ - ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); - cpumask_copy(ipi_mask_ptr, mask); + ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask); /* * The idea is to send one IPI per cluster. diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 98ab13058f89..ad1d91f475ab 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -372,7 +372,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); + return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; } diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 2997af6d2ccd..0a9a6da21e74 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -666,10 +666,19 @@ static inline size_t cpumask_size(void) * * This code makes NR_CPUS length memcopy and brings to a memory corruption. * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) + bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); @@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); #else typedef struct cpumask cpumask_var_t[1]; +#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) + static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 255ce138b652..4a608cfaecbd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); static int find_later_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl); + struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); int this_cpu = smp_processor_id(); int best_cpu, cpu = task_cpu(task); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bfa3c86d0d68..197d659c144c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_group *group; struct rq *busiest; unsigned long flags; - struct cpumask *cpus = __get_cpu_var(load_balance_mask); + struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask); struct lb_env env = { .sd = sd, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5f6edca4fafd..a4c50fce9b90 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); + struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); -- GitLab From a492f075450f3ba87de36e5ffe92a9d0c7af9723 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Thu, 28 Aug 2014 08:15:21 -0600 Subject: [PATCH 0308/1868] block,scsi: fixup blk_get_request dead queue scenarios The blk_get_request function may fail in low-memory conditions or during device removal (even if __GFP_WAIT is set). To distinguish between these errors, modify the blk_get_request call stack to return the appropriate ERR_PTR. Verify that all callers check the return status and consider IS_ERR instead of a simple NULL pointer check. For consistency, make a similar change to the blk_mq_alloc_request leg of blk_get_request. It may fail if the queue is dead, or the caller was unwilling to wait. Signed-off-by: Joe Lawrence Acked-by: Jiri Kosina [for pktdvd] Acked-by: Boaz Harrosh [for osd] Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/blk-core.c | 34 ++++++++++----------- block/blk-mq.c | 8 +++-- block/bsg.c | 8 ++--- block/scsi_ioctl.c | 12 ++++---- drivers/block/paride/pd.c | 4 +-- drivers/block/pktcdvd.c | 4 +-- drivers/block/sx8.c | 2 +- drivers/cdrom/cdrom.c | 4 +-- drivers/ide/ide-park.c | 2 +- drivers/scsi/device_handler/scsi_dh_alua.c | 2 +- drivers/scsi/device_handler/scsi_dh_emc.c | 2 +- drivers/scsi/device_handler/scsi_dh_hp_sw.c | 4 +-- drivers/scsi/device_handler/scsi_dh_rdac.c | 2 +- drivers/scsi/osd/osd_initiator.c | 4 +-- drivers/scsi/osst.c | 2 +- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/scsi_lib.c | 2 +- drivers/scsi/sg.c | 4 +-- drivers/scsi/st.c | 2 +- drivers/target/target_core_pscsi.c | 2 +- 20 files changed, 55 insertions(+), 51 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c359d72e9d76..93603e6ff479 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -933,9 +933,9 @@ static struct io_context *rq_ioc(struct bio *bio) * Get a free request from @q. This function may fail under memory * pressure or if @q is dead. * - * Must be callled with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Must be called with @q->queue_lock held and, + * Returns ERR_PTR on failure, with @q->queue_lock held. + * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *__get_request(struct request_list *rl, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -949,7 +949,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, int may_queue; if (unlikely(blk_queue_dying(q))) - return NULL; + return ERR_PTR(-ENODEV); may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) @@ -974,7 +974,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * process is not a "batcher", and not * exempted by the IO scheduler */ - return NULL; + return ERR_PTR(-ENOMEM); } } } @@ -992,7 +992,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * allocated with any setting of ->nr_requests */ if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) - return NULL; + return ERR_PTR(-ENOMEM); q->nr_rqs[is_sync]++; rl->count[is_sync]++; @@ -1097,7 +1097,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, rq_starved: if (unlikely(rl->count[is_sync] == 0)) rl->starved[is_sync] = 1; - return NULL; + return ERR_PTR(-ENOMEM); } /** @@ -1110,9 +1110,9 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this * function keeps retrying under memory pressure and fails iff @q is dead. * - * Must be callled with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Must be called with @q->queue_lock held and, + * Returns ERR_PTR on failure, with @q->queue_lock held. + * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -1125,12 +1125,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags, rl = blk_get_rl(q, bio); /* transferred to @rq on success */ retry: rq = __get_request(rl, rw_flags, bio, gfp_mask); - if (rq) + if (!IS_ERR(rq)) return rq; if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { blk_put_rl(rl); - return NULL; + return rq; } /* wait on @rl and retry */ @@ -1167,7 +1167,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, spin_lock_irq(q->queue_lock); rq = get_request(q, rw, NULL, gfp_mask); - if (!rq) + if (IS_ERR(rq)) spin_unlock_irq(q->queue_lock); /* q->queue_lock is unlocked at this point */ @@ -1219,8 +1219,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, { struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); - if (unlikely(!rq)) - return ERR_PTR(-ENOMEM); + if (IS_ERR(rq)) + return rq; blk_rq_set_block_pc(rq); @@ -1615,8 +1615,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * Returns with the queue unlocked. */ req = get_request(q, rw_flags, bio, GFP_NOIO); - if (unlikely(!req)) { - bio_endio(bio, -ENODEV); /* @q is dead */ + if (IS_ERR(req)) { + bio_endio(bio, PTR_ERR(req)); /* @q is dead */ goto out_unlock; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 5189cb1e478a..940aa8a34b70 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -218,9 +218,11 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, struct blk_mq_hw_ctx *hctx; struct request *rq; struct blk_mq_alloc_data alloc_data; + int ret; - if (blk_mq_queue_enter(q)) - return NULL; + ret = blk_mq_queue_enter(q); + if (ret) + return ERR_PTR(ret); ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); @@ -240,6 +242,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, ctx = alloc_data.ctx; } blk_mq_put_ctx(ctx); + if (!rq) + return ERR_PTR(-EWOULDBLOCK); return rq; } EXPORT_SYMBOL(blk_mq_alloc_request); diff --git a/block/bsg.c b/block/bsg.c index ff46addde5d8..73c78fd12cc1 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -270,8 +270,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, * map scatter-gather elements separately and string them to request */ rq = blk_get_request(q, rw, GFP_KERNEL); - if (!rq) - return ERR_PTR(-ENOMEM); + if (IS_ERR(rq)) + return rq; blk_rq_set_block_pc(rq); ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); @@ -285,8 +285,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, } next_rq = blk_get_request(q, READ, GFP_KERNEL); - if (!next_rq) { - ret = -ENOMEM; + if (IS_ERR(next_rq)) { + ret = PTR_ERR(next_rq); goto out; } rq->next_rq = next_rq; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 29d056782833..a8b0d0208448 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -318,8 +318,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, at_head = 1; rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); - if (!rq) - return -ENOMEM; + if (IS_ERR(rq)) + return PTR_ERR(rq); blk_rq_set_block_pc(rq); if (blk_fill_sghdr_rq(q, rq, hdr, mode)) { @@ -448,8 +448,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); - if (!rq) { - err = -ENODEV; + if (IS_ERR(rq)) { + err = PTR_ERR(rq); goto error_free_buffer; } @@ -539,8 +539,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, int err; rq = blk_get_request(q, WRITE, __GFP_WAIT); - if (!rq) - return -ENODEV; + if (IS_ERR(rq)) + return PTR_ERR(rq); blk_rq_set_block_pc(rq); rq->timeout = BLK_DEFAULT_SG_TIMEOUT; rq->cmd[0] = cmd; diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index ca831f741d89..d48715b287e6 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -722,8 +722,8 @@ static int pd_special_command(struct pd_unit *disk, int err = 0; rq = blk_get_request(disk->gd->queue, READ, __GFP_WAIT); - if (!rq) - return -ENODEV; + if (IS_ERR(rq)) + return PTR_ERR(rq); rq->cmd_type = REQ_TYPE_SPECIAL; rq->special = func; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 7fa8c80e8982..09e628dafd9d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -704,8 +704,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ, __GFP_WAIT); - if (!rq) - return -ENODEV; + if (IS_ERR(rq)) + return PTR_ERR(rq); blk_rq_set_block_pc(rq); if (cgc->buflen) { diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index d5e2d12b9d9e..5d552857de41 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -568,7 +568,7 @@ static struct carm_request *carm_get_special(struct carm_host *host) return NULL; rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL); - if (!rq) { + if (IS_ERR(rq)) { spin_lock_irqsave(&host->lock, flags); carm_put_request(host, crq); spin_unlock_irqrestore(&host->lock, flags); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 898b84bba28a..5d28a45d2960 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2180,8 +2180,8 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; rq = blk_get_request(q, READ, GFP_KERNEL); - if (!rq) { - ret = -ENOMEM; + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); break; } blk_rq_set_block_pc(rq); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index f41558a0bcd1..ca958604cda2 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -46,7 +46,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) * timeout has expired, so power management will be reenabled. */ rq = blk_get_request(q, READ, GFP_NOWAIT); - if (unlikely(!rq)) + if (IS_ERR(rq)) goto out; rq->cmd[0] = REQ_UNPARK_HEADS; diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 7bcf67eec921..e99507ed0e3c 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -115,7 +115,7 @@ static struct request *get_alua_req(struct scsi_device *sdev, rq = blk_get_request(q, rw, GFP_NOIO); - if (!rq) { + if (IS_ERR(rq)) { sdev_printk(KERN_INFO, sdev, "%s: blk_get_request failed\n", __func__); return NULL; diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 6f07f7fe3aa1..84765384c47c 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -275,7 +275,7 @@ static struct request *get_req(struct scsi_device *sdev, int cmd, rq = blk_get_request(sdev->request_queue, (cmd != INQUIRY) ? WRITE : READ, GFP_NOIO); - if (!rq) { + if (IS_ERR(rq)) { sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed"); return NULL; } diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index e9d9fea9e272..4ee2759f5299 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -117,7 +117,7 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) retry: req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); - if (!req) + if (IS_ERR(req)) return SCSI_DH_RES_TEMP_UNAVAIL; blk_rq_set_block_pc(req); @@ -247,7 +247,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h) struct request *req; req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC); - if (!req) + if (IS_ERR(req)) return SCSI_DH_RES_TEMP_UNAVAIL; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 826069db9848..1b5bc9293e37 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -274,7 +274,7 @@ static struct request *get_rdac_req(struct scsi_device *sdev, rq = blk_get_request(q, rw, GFP_NOIO); - if (!rq) { + if (IS_ERR(rq)) { sdev_printk(KERN_INFO, sdev, "get_rdac_req: blk_get_request failed.\n"); return NULL; diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 5f4cbf0c4759..fd19fd8468ac 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1567,8 +1567,8 @@ static struct request *_make_request(struct request_queue *q, bool has_write, struct request *req; req = blk_get_request(q, has_write ? WRITE : READ, flags); - if (unlikely(!req)) - return ERR_PTR(-ENOMEM); + if (IS_ERR(req)) + return req; blk_rq_set_block_pc(req); return req; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 0727ea7cc387..dff37a250d79 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -362,7 +362,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, int write = (data_direction == DMA_TO_DEVICE); req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); - if (!req) + if (IS_ERR(req)) return DRIVER_ERROR << 24; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 4c433bf47a06..a2c3d3d255a1 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1960,7 +1960,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) * request becomes available */ req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); - if (!req) + if (IS_ERR(req)) return; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ce62e8798cc8..972d0a8adf2e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -221,7 +221,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int ret = DRIVER_ERROR << 24; req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); - if (!req) + if (IS_ERR(req)) return ret; blk_rq_set_block_pc(req); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 01cf88888797..60354449d9ed 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1711,9 +1711,9 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) } rq = blk_get_request(q, rw, GFP_ATOMIC); - if (!rq) { + if (IS_ERR(rq)) { kfree(long_cmdp); - return -ENOMEM; + return PTR_ERR(rq); } blk_rq_set_block_pc(rq); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index aff9689de0f7..59db5bfc11db 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -490,7 +490,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); - if (!req) + if (IS_ERR(req)) return DRIVER_ERROR << 24; blk_rq_set_block_pc(req); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 943b1dbe859a..70d9f6dabba0 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1050,7 +1050,7 @@ pscsi_execute_cmd(struct se_cmd *cmd) req = blk_get_request(pdv->pdv_sd->request_queue, (data_direction == DMA_TO_DEVICE), GFP_KERNEL); - if (!req) { + if (IS_ERR(req)) { pr_err("PSCSI: blk_get_request() failed\n"); ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; goto fail; -- GitLab From 55872c5a3c01f0fe7b5298d19e24e237f5b5ff06 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Aug 2014 15:05:40 -0600 Subject: [PATCH 0309/1868] bsg: fix potential error pointer dereference Dan writes: block/bsg.c:327 bsg_map_hdr() error: 'next_rq' dereferencing possible ERR_PTR(). Fix this by setting next_rq to NULL, for the case where it can be != NULL but an error pointer. Reported-by: Dan Carpenter Signed-off-by: Jens Axboe --- block/bsg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/bsg.c b/block/bsg.c index 73c78fd12cc1..276e869e686c 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -287,6 +287,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, next_rq = blk_get_request(q, READ, GFP_KERNEL); if (IS_ERR(next_rq)) { ret = PTR_ERR(next_rq); + next_rq = NULL; goto out; } rq->next_rq = next_rq; -- GitLab From d0b3c3b6c26c6eed1ba3fa37242dfc8942b5e997 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:00:24 +0100 Subject: [PATCH 0310/1868] drm/nouveau: add reservation to nouveau_gem_ioctl_cpu_prep Apart from some code inside ttm itself and nouveau_bo_vma_del, this is the only place where ttm_bo_wait is used without a reservation. Fix this so we can remove the fence_lock later on. After the switch to rcu the reservation lock will be removed again. Signed-off-by: Maarten Lankhorst Acked-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_gem.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 292a677bfed4..0054315eb879 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -884,17 +884,31 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, struct drm_gem_object *gem; struct nouveau_bo *nvbo; bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT); - int ret = -EINVAL; + int ret; + struct nouveau_fence *fence = NULL; gem = drm_gem_object_lookup(dev, file_priv, req->handle); if (!gem) return -ENOENT; nvbo = nouveau_gem_object(gem); - spin_lock(&nvbo->bo.bdev->fence_lock); - ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait); - spin_unlock(&nvbo->bo.bdev->fence_lock); + ret = ttm_bo_reserve(&nvbo->bo, true, false, false, NULL); + if (!ret) { + spin_lock(&nvbo->bo.bdev->fence_lock); + ret = ttm_bo_wait(&nvbo->bo, true, true, true); + if (!no_wait && ret) + fence = nouveau_fence_ref(nvbo->bo.sync_obj); + spin_unlock(&nvbo->bo.bdev->fence_lock); + + ttm_bo_unreserve(&nvbo->bo); + } drm_gem_object_unreference_unlocked(gem); + + if (fence) { + ret = nouveau_fence_wait(fence, true, no_wait); + nouveau_fence_unref(&fence); + } + return ret; } -- GitLab From bdaf7ddf652ef51fd363b052e320711c06f6f553 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:02:14 +0100 Subject: [PATCH 0311/1868] drm/nouveau: require reservations for nouveau_fence_sync and nouveau_bo_fence This will ensure we always hold the required lock when calling those functions. Signed-off-by: Maarten Lankhorst Acked-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 ++ drivers/gpu/drm/nouveau/nouveau_display.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0591ca0734e3..ed966f51e29b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1463,6 +1463,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) struct nouveau_fence *new_fence = nouveau_fence_ref(fence); struct nouveau_fence *old_fence = NULL; + lockdep_assert_held(&nvbo->bo.resv->lock.base); + spin_lock(&nvbo->bo.bdev->fence_lock); old_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = new_fence; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 65b4fd53dd4e..54b1f3d8fc7f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -717,6 +717,9 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, } mutex_lock(&cli->mutex); + ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL); + if (ret) + goto fail_unpin; /* synchronise rendering channel with the kernel's channel */ spin_lock(&new_bo->bo.bdev->fence_lock); @@ -724,12 +727,18 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, spin_unlock(&new_bo->bo.bdev->fence_lock); ret = nouveau_fence_sync(fence, chan); nouveau_fence_unref(&fence); - if (ret) + if (ret) { + ttm_bo_unreserve(&new_bo->bo); goto fail_unpin; + } - ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL); - if (ret) - goto fail_unpin; + if (new_bo != old_bo) { + ttm_bo_unreserve(&new_bo->bo); + + ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL); + if (ret) + goto fail_unpin; + } /* Initialize a page flip struct */ *s = (struct nouveau_page_flip_state) -- GitLab From 7040138ff85501931138970663a988f48c0666f0 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:07:01 +0100 Subject: [PATCH 0312/1868] drm/ttm: call ttm_bo_wait while inside a reservation This is the last remaining function that doesn't use the reservation lock completely to fence off access to a buffer. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/ttm/ttm_bo.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index b992ec3c318a..4f1bc948bda0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -503,17 +503,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, if (ret) return ret; - /* - * remove sync_obj with ttm_bo_wait, the wait should be - * finished, and no new wait object should have been added. - */ - spin_lock(&bdev->fence_lock); - ret = ttm_bo_wait(bo, false, false, true); - WARN_ON(ret); - spin_unlock(&bdev->fence_lock); - if (ret) - return ret; - spin_lock(&glob->lru_lock); ret = __ttm_bo_reserve(bo, false, true, false, NULL); @@ -529,8 +518,16 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, spin_unlock(&glob->lru_lock); return 0; } - } else - spin_unlock(&bdev->fence_lock); + + /* + * remove sync_obj with ttm_bo_wait, the wait should be + * finished, and no new wait object should have been added. + */ + spin_lock(&bdev->fence_lock); + ret = ttm_bo_wait(bo, false, false, true); + WARN_ON(ret); + } + spin_unlock(&bdev->fence_lock); if (ret || unlikely(list_empty(&bo->ddestroy))) { __ttm_bo_unreserve(bo); @@ -1524,6 +1521,8 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, void *sync_obj; int ret = 0; + lockdep_assert_held(&bo->resv->lock.base); + if (likely(bo->sync_obj == NULL)) return 0; -- GitLab From dd7cfd641228abb2669d8d047d5ec377b1835900 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:07:31 +0100 Subject: [PATCH 0313/1868] drm/ttm: kill fence_lock No users are left, kill it off! :D Conversion to the reservation api is next on the list, after that the functionality can be restored with rcu. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 25 +++----- drivers/gpu/drm/nouveau/nouveau_display.c | 6 +- drivers/gpu/drm/nouveau/nouveau_gem.c | 16 +---- drivers/gpu/drm/qxl/qxl_cmd.c | 2 - drivers/gpu/drm/qxl/qxl_fence.c | 4 -- drivers/gpu/drm/qxl/qxl_object.h | 2 - drivers/gpu/drm/qxl/qxl_release.c | 2 - drivers/gpu/drm/radeon/radeon_display.c | 7 +-- drivers/gpu/drm/radeon/radeon_object.c | 2 - drivers/gpu/drm/ttm/ttm_bo.c | 75 ++++++----------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 5 -- drivers/gpu/drm/ttm/ttm_bo_vm.c | 3 - drivers/gpu/drm/ttm/ttm_execbuf_util.c | 2 - drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 4 -- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 16 ++--- include/drm/ttm/ttm_bo_api.h | 5 +- include/drm/ttm/ttm_bo_driver.h | 3 - 17 files changed, 37 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index ed966f51e29b..8d8e5f6340d0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1212,9 +1212,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, } /* Fallback to software copy. */ - spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, true, intr, no_wait_gpu); - spin_unlock(&bo->bdev->fence_lock); if (ret == 0) ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); @@ -1457,26 +1455,19 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } +static void +nouveau_bo_fence_unref(void **sync_obj) +{ + nouveau_fence_unref((struct nouveau_fence **)sync_obj); +} + void nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) { - struct nouveau_fence *new_fence = nouveau_fence_ref(fence); - struct nouveau_fence *old_fence = NULL; - lockdep_assert_held(&nvbo->bo.resv->lock.base); - spin_lock(&nvbo->bo.bdev->fence_lock); - old_fence = nvbo->bo.sync_obj; - nvbo->bo.sync_obj = new_fence; - spin_unlock(&nvbo->bo.bdev->fence_lock); - - nouveau_fence_unref(&old_fence); -} - -static void -nouveau_bo_fence_unref(void **sync_obj) -{ - nouveau_fence_unref((struct nouveau_fence **)sync_obj); + nouveau_bo_fence_unref(&nvbo->bo.sync_obj); + nvbo->bo.sync_obj = nouveau_fence_ref(fence); } static void * diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 54b1f3d8fc7f..e6867b9ebb46 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -722,11 +722,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, goto fail_unpin; /* synchronise rendering channel with the kernel's channel */ - spin_lock(&new_bo->bo.bdev->fence_lock); - fence = nouveau_fence_ref(new_bo->bo.sync_obj); - spin_unlock(&new_bo->bo.bdev->fence_lock); - ret = nouveau_fence_sync(fence, chan); - nouveau_fence_unref(&fence); + ret = nouveau_fence_sync(new_bo->bo.sync_obj, chan); if (ret) { ttm_bo_unreserve(&new_bo->bo); goto fail_unpin; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 0054315eb879..1650c0bdb0fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -103,9 +103,7 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma) list_del(&vma->head); if (mapped) { - spin_lock(&nvbo->bo.bdev->fence_lock); fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); } if (fence) { @@ -430,17 +428,11 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv, static int validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo) { - struct nouveau_fence *fence = NULL; + struct nouveau_fence *fence = nvbo->bo.sync_obj; int ret = 0; - spin_lock(&nvbo->bo.bdev->fence_lock); - fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); - - if (fence) { + if (fence) ret = nouveau_fence_sync(fence, chan); - nouveau_fence_unref(&fence); - } return ret; } @@ -659,9 +651,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, data |= r->vor; } - spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.bdev->fence_lock); if (ret) { NV_PRINTK(error, cli, "reloc wait_idle failed: %d\n", ret); break; @@ -894,11 +884,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, ret = ttm_bo_reserve(&nvbo->bo, true, false, false, NULL); if (!ret) { - spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, true, true, true); if (!no_wait && ret) fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); ttm_bo_unreserve(&nvbo->bo); } diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index eb89653a7a17..45fad7b45486 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -628,9 +628,7 @@ static int qxl_reap_surf(struct qxl_device *qdev, struct qxl_bo *surf, bool stal if (stall) mutex_unlock(&qdev->surf_evict_mutex); - spin_lock(&surf->tbo.bdev->fence_lock); ret = ttm_bo_wait(&surf->tbo, true, true, !stall); - spin_unlock(&surf->tbo.bdev->fence_lock); if (stall) mutex_lock(&qdev->surf_evict_mutex); diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c index ae59e91cfb9a..c7248418117d 100644 --- a/drivers/gpu/drm/qxl/qxl_fence.c +++ b/drivers/gpu/drm/qxl/qxl_fence.c @@ -60,9 +60,6 @@ int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id) { void *ret; int retval = 0; - struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence); - - spin_lock(&bo->tbo.bdev->fence_lock); ret = radix_tree_delete(&qfence->tree, rel_id); if (ret == qfence) @@ -71,7 +68,6 @@ int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id) DRM_DEBUG("didn't find fence in radix tree for %d\n", rel_id); retval = -ENOENT; } - spin_unlock(&bo->tbo.bdev->fence_lock); return retval; } diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h index 83a423293afd..1edaf5768086 100644 --- a/drivers/gpu/drm/qxl/qxl_object.h +++ b/drivers/gpu/drm/qxl/qxl_object.h @@ -76,12 +76,10 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type, } return r; } - spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 14e776f1d14e..2e5e38fee9b2 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -337,7 +337,6 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) glob = bo->glob; spin_lock(&glob->lru_lock); - spin_lock(&bdev->fence_lock); list_for_each_entry(entry, &release->bos, head) { bo = entry->bo; @@ -352,7 +351,6 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) __ttm_bo_unreserve(bo); entry->reserved = false; } - spin_unlock(&bdev->fence_lock); spin_unlock(&glob->lru_lock); ww_acquire_fini(&release->ticket); } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index bd0d687379ee..7d0a7abdab2a 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -476,11 +476,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, obj = new_radeon_fb->obj; new_rbo = gem_to_radeon_bo(obj); - spin_lock(&new_rbo->tbo.bdev->fence_lock); - if (new_rbo->tbo.sync_obj) - work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj); - spin_unlock(&new_rbo->tbo.bdev->fence_lock); - /* pin the new buffer */ DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n", work->old_rbo, new_rbo); @@ -499,6 +494,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } + work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo); @@ -582,7 +578,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); radeon_fence_unref(&work->fence); kfree(work); - return r; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index c97a42432e2b..cbac963571c0 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -779,12 +779,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4f1bc948bda0..195386f16ca4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -415,24 +415,20 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) spin_lock(&glob->lru_lock); ret = __ttm_bo_reserve(bo, false, true, false, NULL); - spin_lock(&bdev->fence_lock); - (void) ttm_bo_wait(bo, false, false, true); - if (!ret && !bo->sync_obj) { - spin_unlock(&bdev->fence_lock); - put_count = ttm_bo_del_from_lru(bo); + if (!ret) { + (void) ttm_bo_wait(bo, false, false, true); - spin_unlock(&glob->lru_lock); - ttm_bo_cleanup_memtype_use(bo); + if (!bo->sync_obj) { + put_count = ttm_bo_del_from_lru(bo); - ttm_bo_list_ref_sub(bo, put_count, true); + spin_unlock(&glob->lru_lock); + ttm_bo_cleanup_memtype_use(bo); - return; - } - if (bo->sync_obj) - sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); + ttm_bo_list_ref_sub(bo, put_count, true); - if (!ret) { + return; + } + sync_obj = driver->sync_obj_ref(bo->sync_obj); /* * Make NO_EVICT bos immediately available to @@ -481,7 +477,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, int put_count; int ret; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, true); if (ret && !no_wait_gpu) { @@ -493,7 +488,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, * no new sync objects can be attached. */ sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); __ttm_bo_unreserve(bo); spin_unlock(&glob->lru_lock); @@ -523,11 +517,9 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, * remove sync_obj with ttm_bo_wait, the wait should be * finished, and no new wait object should have been added. */ - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, true); WARN_ON(ret); } - spin_unlock(&bdev->fence_lock); if (ret || unlikely(list_empty(&bo->ddestroy))) { __ttm_bo_unreserve(bo); @@ -665,9 +657,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) { @@ -958,7 +948,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem; - struct ttm_bo_device *bdev = bo->bdev; lockdep_assert_held(&bo->resv->lock.base); @@ -967,9 +956,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */ - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages; @@ -1459,7 +1446,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->glob = glob; bdev->need_dma32 = need_dma32; bdev->val_seq = 0; - spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex); @@ -1517,7 +1503,6 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver; - struct ttm_bo_device *bdev = bo->bdev; void *sync_obj; int ret = 0; @@ -1526,53 +1511,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, if (likely(bo->sync_obj == NULL)) return 0; - while (bo->sync_obj) { - + if (bo->sync_obj) { if (driver->sync_obj_signaled(bo->sync_obj)) { - void *tmp_obj = bo->sync_obj; - bo->sync_obj = NULL; + driver->sync_obj_unref(&bo->sync_obj); clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&tmp_obj); - spin_lock(&bdev->fence_lock); - continue; + return 0; } if (no_wait) return -EBUSY; sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); ret = driver->sync_obj_wait(sync_obj, lazy, interruptible); - if (unlikely(ret != 0)) { - driver->sync_obj_unref(&sync_obj); - spin_lock(&bdev->fence_lock); - return ret; - } - spin_lock(&bdev->fence_lock); - if (likely(bo->sync_obj == sync_obj)) { - void *tmp_obj = bo->sync_obj; - bo->sync_obj = NULL; + + if (likely(ret == 0)) { clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&sync_obj); - driver->sync_obj_unref(&tmp_obj); - spin_lock(&bdev->fence_lock); - } else { - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&sync_obj); - spin_lock(&bdev->fence_lock); + driver->sync_obj_unref(&bo->sync_obj); } + driver->sync_obj_unref(&sync_obj); } - return 0; + return ret; } EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) { - struct ttm_bo_device *bdev = bo->bdev; int ret = 0; /* @@ -1582,9 +1547,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) ret = ttm_bo_reserve(bo, true, no_wait, false, NULL); if (unlikely(ret != 0)) return ret; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, true, no_wait); - spin_unlock(&bdev->fence_lock); if (likely(ret == 0)) atomic_inc(&bo->cpu_writers); ttm_bo_unreserve(bo); @@ -1641,9 +1604,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Wait for GPU, then move to system cached. */ - spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->bdev->fence_lock); if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 30e5d90cb7bc..495aebf0f9c3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -466,12 +466,10 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, drm_vma_node_reset(&fbo->vma_node); atomic_set(&fbo->cpu_writers, 0); - spin_lock(&bdev->fence_lock); if (bo->sync_obj) fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj); else fbo->sync_obj = NULL; - spin_unlock(&bdev->fence_lock); kref_init(&fbo->list_kref); kref_init(&fbo->kref); fbo->destroy = &ttm_transfered_destroy; @@ -657,7 +655,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL; - spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL; @@ -665,7 +662,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj = driver->sync_obj_ref(sync_obj); if (evict) { ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret) @@ -688,7 +684,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */ set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 0ce48e5a9cb4..d05437f219e9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -45,10 +45,8 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_area_struct *vma, struct vm_fault *vmf) { - struct ttm_bo_device *bdev = bo->bdev; int ret = 0; - spin_lock(&bdev->fence_lock); if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags))) goto out_unlock; @@ -82,7 +80,6 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, VM_FAULT_NOPAGE; out_unlock: - spin_unlock(&bdev->fence_lock); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index e8dac8758528..0fbbbbd67afc 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -217,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, glob = bo->glob; spin_lock(&glob->lru_lock); - spin_lock(&bdev->fence_lock); list_for_each_entry(entry, list, head) { bo = entry->bo; @@ -227,7 +226,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, __ttm_bo_unreserve(bo); entry->reserved = false; } - spin_unlock(&bdev->fence_lock); spin_unlock(&glob->lru_lock); if (ticket) ww_acquire_fini(ticket); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 37c093c0c7b8..c133b3d10de8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -863,11 +863,7 @@ static void vmw_move_notify(struct ttm_buffer_object *bo, */ static void vmw_swap_notify(struct ttm_buffer_object *bo) { - struct ttm_bo_device *bdev = bo->bdev; - - spin_lock(&bdev->fence_lock); ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index a432c0db257c..1ee86bf82750 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -567,12 +567,13 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo, int ret; if (flags & drm_vmw_synccpu_allow_cs) { - struct ttm_bo_device *bdev = bo->bdev; + bool nonblock = !!(flags & drm_vmw_synccpu_dontblock); - spin_lock(&bdev->fence_lock); - ret = ttm_bo_wait(bo, false, true, - !!(flags & drm_vmw_synccpu_dontblock)); - spin_unlock(&bdev->fence_lock); + ret = ttm_bo_reserve(bo, true, nonblock, false, NULL); + if (!ret) { + ret = ttm_bo_wait(bo, false, true, nonblock); + ttm_bo_unreserve(bo); + } return ret; } @@ -1429,12 +1430,10 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo, else driver->sync_obj_ref(fence); - spin_lock(&bdev->fence_lock); old_fence_obj = bo->sync_obj; bo->sync_obj = fence; - spin_unlock(&bdev->fence_lock); if (old_fence_obj) vmw_fence_obj_unreference(&old_fence_obj); @@ -1475,7 +1474,6 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo, if (mem->mem_type != VMW_PL_MOB) { struct vmw_resource *res, *n; - struct ttm_bo_device *bdev = bo->bdev; struct ttm_validate_buffer val_buf; val_buf.bo = bo; @@ -1491,9 +1489,7 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo, list_del_init(&res->mob_head); } - spin_lock(&bdev->fence_lock); (void) ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); } } diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index e3d39c80a091..5805f4a49478 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -237,10 +237,7 @@ struct ttm_buffer_object { struct list_head io_reserve_lru; /** - * Members protected by struct buffer_object_device::fence_lock - * In addition, setting sync_obj to anything else - * than NULL requires bo::reserved to be held. This allows for - * checking NULL while reserved but not holding the mentioned lock. + * Members protected by a bo reservation. */ void *sync_obj; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 5c8bb5699a6f..e1ee141e26cc 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -521,8 +521,6 @@ struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. - * @fence_lock: Protects the synchronizing members on *all* bos belonging - * to this device. * @vma_manager: Address space manager * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. @@ -542,7 +540,6 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; - spinlock_t fence_lock; /* * Protected by internal locks. -- GitLab From 58b4d720c1620bbf09e42b4f218dcb2d0d8cdf3e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Jan 2014 11:03:08 +0100 Subject: [PATCH 0314/1868] drm/ttm: add interruptible parameter to ttm_eu_reserve_buffers It seems some drivers really want this as a parameter, like vmwgfx. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_release.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/radeon/radeon_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 22 +++++++++++++--------- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 7 ++----- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +- include/drm/ttm/ttm_execbuf_util.h | 9 +++++---- 7 files changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 2e5e38fee9b2..656f9d3a946d 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -159,7 +159,7 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr) if (list_is_singular(&release->bos)) return 0; - ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos); + ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos, !no_intr); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index cbac963571c0..378fe9ea4d44 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -482,7 +482,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, u64 bytes_moved = 0, initial_bytes_moved; u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); - r = ttm_eu_reserve_buffers(ticket, head); + r = ttm_eu_reserve_buffers(ticket, head, true); if (unlikely(r != 0)) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 4751c6728fe9..3d9a6a036f8a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -399,7 +399,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, INIT_LIST_HEAD(&head); list_add(&tv.head, &head); - r = ttm_eu_reserve_buffers(&ticket, &head); + r = ttm_eu_reserve_buffers(&ticket, &head, true); if (r) return r; diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 0fbbbbd67afc..87d7deefc806 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); */ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, - struct list_head *list) + struct list_head *list, bool intr) { struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; @@ -140,7 +140,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, if (entry->reserved) continue; - ret = __ttm_bo_reserve(bo, true, (ticket == NULL), true, + ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true, ticket); if (ret == -EDEADLK) { @@ -153,13 +153,17 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, ttm_eu_backoff_reservation_locked(list); spin_unlock(&glob->lru_lock); ttm_eu_list_ref_sub(list); - ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, - ticket); - if (unlikely(ret != 0)) { - if (ret == -EINTR) - ret = -ERESTARTSYS; - goto err_fini; - } + + if (intr) { + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + ticket); + if (unlikely(ret != 0)) { + if (ret == -EINTR) + ret = -ERESTARTSYS; + goto err_fini; + } + } else + ww_mutex_lock_slow(&bo->resv->lock, ticket); entry->reserved = true; if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 7bfdaa163a33..24f067bf438d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2496,7 +2496,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, if (unlikely(ret != 0)) goto out_err_nores; - ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes); + ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true); if (unlikely(ret != 0)) goto out_err; @@ -2684,10 +2684,7 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv, query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo); list_add_tail(&query_val.head, &validate_list); - do { - ret = ttm_eu_reserve_buffers(&ticket, &validate_list); - } while (ret == -ERESTARTSYS); - + ret = ttm_eu_reserve_buffers(&ticket, &validate_list, false); if (unlikely(ret != 0)) { vmw_execbuf_unpin_panic(dev_priv); goto out_no_reserve; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 1ee86bf82750..23169362bca8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1216,7 +1216,7 @@ vmw_resource_check_buffer(struct vmw_resource *res, INIT_LIST_HEAD(&val_list); val_buf->bo = ttm_bo_reference(&res->backup->base); list_add_tail(&val_buf->head, &val_list); - ret = ttm_eu_reserve_buffers(NULL, &val_list); + ret = ttm_eu_reserve_buffers(NULL, &val_list, interruptible); if (unlikely(ret != 0)) goto out_no_reserve; diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index 16db7d01a336..fd95fd569ca3 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -73,6 +73,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, * @ticket: [out] ww_acquire_ctx filled in by call, or NULL if only * non-blocking reserves should be tried. * @list: thread private list of ttm_validate_buffer structs. + * @intr: should the wait be interruptible * * Tries to reserve bos pointed to by the list entries for validation. * If the function returns 0, all buffers are marked as "unfenced", @@ -84,9 +85,9 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, * CPU write reservations to be cleared, and for other threads to * unreserve their buffers. * - * This function may return -ERESTART or -EAGAIN if the calling process - * receives a signal while waiting. In that case, no buffers on the list - * will be reserved upon return. + * If intr is set to true, this function may return -ERESTARTSYS if the + * calling process receives a signal while waiting. In that case, no + * buffers on the list will be reserved upon return. * * Buffers reserved by this function should be unreserved by * a call to either ttm_eu_backoff_reservation() or @@ -95,7 +96,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, */ extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, - struct list_head *list); + struct list_head *list, bool intr); /** * function ttm_eu_fence_buffer_objects. -- GitLab From 1f0dc9a59afeccb96a35ebec36661266260f5eee Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Jan 2014 11:03:08 +0100 Subject: [PATCH 0315/1868] drm/ttm: kill off some members to ttm_validate_buffer This reorders the list to keep track of what buffers are reserved, so previous members are always unreserved. This gets rid of some bookkeeping that's no longer needed, while simplifying the code some. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_release.c | 1 - drivers/gpu/drm/ttm/ttm_execbuf_util.c | 142 +++++++++--------------- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 1 - include/drm/ttm/ttm_execbuf_util.h | 3 - 4 files changed, 50 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 656f9d3a946d..4045ba873ab8 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -349,7 +349,6 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) ttm_bo_add_to_lru(bo); __ttm_bo_unreserve(bo); - entry->reserved = false; } spin_unlock(&glob->lru_lock); ww_acquire_fini(&release->ticket); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 87d7deefc806..108730e9147b 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -32,20 +32,12 @@ #include #include -static void ttm_eu_backoff_reservation_locked(struct list_head *list) +static void ttm_eu_backoff_reservation_reverse(struct list_head *list, + struct ttm_validate_buffer *entry) { - struct ttm_validate_buffer *entry; - - list_for_each_entry(entry, list, head) { + list_for_each_entry_continue_reverse(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - if (!entry->reserved) - continue; - entry->reserved = false; - if (entry->removed) { - ttm_bo_add_to_lru(bo); - entry->removed = false; - } __ttm_bo_unreserve(bo); } } @@ -56,27 +48,9 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list) list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - if (!entry->reserved) - continue; + unsigned put_count = ttm_bo_del_from_lru(bo); - if (!entry->removed) { - entry->put_count = ttm_bo_del_from_lru(bo); - entry->removed = true; - } - } -} - -static void ttm_eu_list_ref_sub(struct list_head *list) -{ - struct ttm_validate_buffer *entry; - - list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - - if (entry->put_count) { - ttm_bo_list_ref_sub(bo, entry->put_count, true); - entry->put_count = 0; - } + ttm_bo_list_ref_sub(bo, put_count, true); } } @@ -91,11 +65,18 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; + spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list); + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + ttm_bo_add_to_lru(bo); + __ttm_bo_unreserve(bo); + } + spin_unlock(&glob->lru_lock); + if (ticket) ww_acquire_fini(ticket); - spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_eu_backoff_reservation); @@ -121,64 +102,55 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, if (list_empty(list)) return 0; - list_for_each_entry(entry, list, head) { - entry->reserved = false; - entry->put_count = 0; - entry->removed = false; - } - entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; if (ticket) ww_acquire_init(ticket, &reservation_ww_class); -retry: + list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - /* already slowpath reserved? */ - if (entry->reserved) - continue; - ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true, ticket); + if (!ret && unlikely(atomic_read(&bo->cpu_writers) > 0)) { + __ttm_bo_unreserve(bo); - if (ret == -EDEADLK) { - /* uh oh, we lost out, drop every reservation and try - * to only reserve this buffer, then start over if - * this succeeds. - */ - BUG_ON(ticket == NULL); - spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list); - spin_unlock(&glob->lru_lock); - ttm_eu_list_ref_sub(list); - - if (intr) { - ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, - ticket); - if (unlikely(ret != 0)) { - if (ret == -EINTR) - ret = -ERESTARTSYS; - goto err_fini; - } - } else - ww_mutex_lock_slow(&bo->resv->lock, ticket); - - entry->reserved = true; - if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { - ret = -EBUSY; - goto err; - } - goto retry; - } else if (ret) - goto err; - - entry->reserved = true; - if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { ret = -EBUSY; - goto err; } + + if (!ret) + continue; + + /* uh oh, we lost out, drop every reservation and try + * to only reserve this buffer, then start over if + * this succeeds. + */ + ttm_eu_backoff_reservation_reverse(list, entry); + + if (ret == -EDEADLK && intr) { + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + ticket); + } else if (ret == -EDEADLK) { + ww_mutex_lock_slow(&bo->resv->lock, ticket); + ret = 0; + } + + if (unlikely(ret != 0)) { + if (ret == -EINTR) + ret = -ERESTARTSYS; + if (ticket) { + ww_acquire_done(ticket); + ww_acquire_fini(ticket); + } + return ret; + } + + /* move this item to the front of the list, + * forces correct iteration of the loop without keeping track + */ + list_del(&entry->head); + list_add(&entry->head, list); } if (ticket) @@ -186,20 +158,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, spin_lock(&glob->lru_lock); ttm_eu_del_from_lru_locked(list); spin_unlock(&glob->lru_lock); - ttm_eu_list_ref_sub(list); return 0; - -err: - spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list); - spin_unlock(&glob->lru_lock); - ttm_eu_list_ref_sub(list); -err_fini: - if (ticket) { - ww_acquire_done(ticket); - ww_acquire_fini(ticket); - } - return ret; } EXPORT_SYMBOL(ttm_eu_reserve_buffers); @@ -228,7 +187,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, bo->sync_obj = driver->sync_obj_ref(sync_obj); ttm_bo_add_to_lru(bo); __ttm_bo_unreserve(bo); - entry->reserved = false; } spin_unlock(&glob->lru_lock); if (ticket) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 24f067bf438d..b19b2b980cb4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -346,7 +346,6 @@ static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context, ++sw_context->cur_val_buf; val_buf = &vval_buf->base; val_buf->bo = ttm_bo_reference(bo); - val_buf->reserved = false; list_add_tail(&val_buf->head, &sw_context->validate_nodes); vval_buf->validate_as_mob = validate_as_mob; } diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index fd95fd569ca3..8490cb8ee0d8 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -48,9 +48,6 @@ struct ttm_validate_buffer { struct list_head head; struct ttm_buffer_object *bo; - bool reserved; - bool removed; - int put_count; void *old_sync_obj; }; -- GitLab From 954605ca3f897ad617123279eb3404a404cce5ab Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Jan 2014 11:03:12 +0100 Subject: [PATCH 0316/1868] drm/radeon: use common fence implementation for fences, v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes since v1: - Kill the sw interrupt dance, add and use radeon_irq_kms_sw_irq_get_delayed instead. - Change custom wait function, lockdep complained about it. Holding exclusive_lock in the wait function might cause deadlocks. Instead do all the processing in .enable_signaling, and wait on the global fence_queue to pick up gpu resets. - Process all fences in radeon_gpu_reset after reset to close a race with the trylock in enable_signaling. Changes since v2: - Small changes to work with the rewritten lockup recovery patches. Changes since v3: - Call radeon_fence_schedule_check when exclusive_lock cannot be acquired to always cause a wake up. - Reset irqs from hangup check. - Drop reading seqno in the callback, use cached value. - Fix indentation in radeon_fence_default_wait - Add a radeon_test_signaled function, drop a few test_bit calls. - Make to_radeon_fence global. Signed-off-by: Maarten Lankhorst Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 23 ++- drivers/gpu/drm/radeon/radeon_device.c | 1 + drivers/gpu/drm/radeon/radeon_fence.c | 225 +++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_irq_kms.c | 15 ++ 4 files changed, 235 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 83a24614138a..d80dc547a105 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -354,17 +355,19 @@ struct radeon_fence_driver { /* sync_seq is protected by ring emission lock */ uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; - bool initialized; + bool initialized, delayed_irq; struct delayed_work lockup_work; }; struct radeon_fence { + struct fence base; + struct radeon_device *rdev; - struct kref kref; - /* protected by radeon_fence.lock */ uint64_t seq; /* RB, DMA, etc. */ unsigned ring; + + wait_queue_t fence_wake; }; int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); @@ -782,6 +785,7 @@ struct radeon_irq { int radeon_irq_kms_init(struct radeon_device *rdev); void radeon_irq_kms_fini(struct radeon_device *rdev); void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); +bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); @@ -2308,6 +2312,7 @@ struct radeon_device { struct radeon_mman mman; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; wait_queue_head_t fence_queue; + unsigned fence_context; struct mutex ring_lock; struct radeon_ring ring[RADEON_NUM_RINGS]; bool ib_pool_ready; @@ -2441,7 +2446,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); /* * Cast helper */ -#define to_radeon_fence(p) ((struct radeon_fence *)(p)) +extern const struct fence_ops radeon_fence_ops; + +static inline struct radeon_fence *to_radeon_fence(struct fence *f) +{ + struct radeon_fence *__f = container_of(f, struct radeon_fence, base); + + if (__f->base.ops == &radeon_fence_ops) + return __f; + + return NULL; +} /* * Registers read & write functions. diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index d30f1cc1aa12..e84a76e6656a 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev, for (i = 0; i < RADEON_NUM_RINGS; i++) { rdev->ring[i].idx = i; } + rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS); DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", radeon_family_name[rdev->family], pdev->vendor, pdev->device, diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index ecdba3afa2c3..af9f2d6bd7d0 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -130,21 +130,59 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring) { + u64 seq = ++rdev->fence_drv[ring].sync_seq[ring]; + /* we are protected by the ring emission mutex */ *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); if ((*fence) == NULL) { return -ENOMEM; } - kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; + (*fence)->seq = seq; (*fence)->ring = ring; + fence_init(&(*fence)->base, &radeon_fence_ops, + &rdev->fence_queue.lock, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); return 0; } +/** + * radeon_fence_check_signaled - callback from fence_queue + * + * this function is called with fence_queue lock held, which is also used + * for the fence locking itself, so unlocked variants are used for + * fence_signal, and remove_wait_queue. + */ +static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) +{ + struct radeon_fence *fence; + u64 seq; + + fence = container_of(wait, struct radeon_fence, fence_wake); + + /* + * We cannot use radeon_fence_process here because we're already + * in the waitqueue, in a call from wake_up_all. + */ + seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); + if (seq >= fence->seq) { + int ret = fence_signal_locked(&fence->base); + + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + + radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); + __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake); + fence_put(&fence->base); + } else + FENCE_TRACE(&fence->base, "pending\n"); + return 0; +} + /** * radeon_fence_activity - check for fence activity * @@ -242,6 +280,15 @@ static void radeon_fence_check_lockup(struct work_struct *work) return; } + if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { + unsigned long irqflags; + + fence_drv->delayed_irq = false; + spin_lock_irqsave(&rdev->irq.lock, irqflags); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + } + if (radeon_fence_activity(rdev, ring)) wake_up_all(&rdev->fence_queue); @@ -275,21 +322,6 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) wake_up_all(&rdev->fence_queue); } -/** - * radeon_fence_destroy - destroy a fence - * - * @kref: fence kref - * - * Frees the fence object (all asics). - */ -static void radeon_fence_destroy(struct kref *kref) -{ - struct radeon_fence *fence; - - fence = container_of(kref, struct radeon_fence, kref); - kfree(fence); -} - /** * radeon_fence_seq_signaled - check if a fence sequence number has signaled * @@ -318,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, return false; } +static bool radeon_fence_is_signaled(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + unsigned ring = fence->ring; + u64 seq = fence->seq; + + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { + return true; + } + + if (down_read_trylock(&rdev->exclusive_lock)) { + radeon_fence_process(rdev, ring); + up_read(&rdev->exclusive_lock); + + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { + return true; + } + } + return false; +} + +/** + * radeon_fence_enable_signaling - enable signalling on fence + * @fence: fence + * + * This function is called with fence_queue lock held, and adds a callback + * to fence_queue that checks if this fence is signaled, and if so it + * signals the fence and removes itself. + */ +static bool radeon_fence_enable_signaling(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) + return false; + + if (down_read_trylock(&rdev->exclusive_lock)) { + radeon_irq_kms_sw_irq_get(rdev, fence->ring); + + if (radeon_fence_activity(rdev, fence->ring)) + wake_up_all_locked(&rdev->fence_queue); + + /* did fence get signaled after we enabled the sw irq? */ + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { + radeon_irq_kms_sw_irq_put(rdev, fence->ring); + up_read(&rdev->exclusive_lock); + return false; + } + + up_read(&rdev->exclusive_lock); + } else { + /* we're probably in a lockup, lets not fiddle too much */ + if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) + rdev->fence_drv[fence->ring].delayed_irq = true; + radeon_fence_schedule_check(rdev, fence->ring); + } + + fence->fence_wake.flags = 0; + fence->fence_wake.private = NULL; + fence->fence_wake.func = radeon_fence_check_signaled; + __add_wait_queue(&rdev->fence_queue, &fence->fence_wake); + fence_get(f); + + FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); + return true; +} + /** * radeon_fence_signaled - check if a fence has signaled * @@ -330,8 +431,15 @@ bool radeon_fence_signaled(struct radeon_fence *fence) { if (!fence) return true; - if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) + + if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { + int ret; + + ret = fence_signal(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); return true; + } return false; } @@ -433,17 +541,15 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) uint64_t seq[RADEON_NUM_RINGS] = {}; long r; - if (fence == NULL) { - WARN(1, "Querying an invalid fence : %p !\n", fence); - return -EINVAL; - } - seq[fence->ring] = fence->seq; r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) { return r; } + r = fence_signal(&fence->base); + if (!r) + FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); return 0; } @@ -557,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) */ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { - kref_get(&fence->kref); + fence_get(&fence->base); return fence; } @@ -574,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence) *fence = NULL; if (tmp) { - kref_put(&tmp->kref, radeon_fence_destroy); + fence_put(&tmp->base); } } @@ -887,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev) return 0; #endif } + +static const char *radeon_fence_get_driver_name(struct fence *fence) +{ + return "radeon"; +} + +static const char *radeon_fence_get_timeline_name(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + switch (fence->ring) { + case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx"; + case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1"; + case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2"; + case R600_RING_TYPE_DMA_INDEX: return "radeon.dma"; + case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1"; + case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd"; + case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1"; + case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2"; + default: WARN_ON_ONCE(1); return "radeon.unk"; + } +} + +static inline bool radeon_test_signaled(struct radeon_fence *fence) +{ + return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); +} + +static signed long radeon_fence_default_wait(struct fence *f, bool intr, + signed long t) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + bool signaled; + + fence_enable_sw_signaling(&fence->base); + + /* + * This function has to return -EDEADLK, but cannot hold + * exclusive_lock during the wait because some callers + * may already hold it. This means checking needs_reset without + * lock, and not fiddling with any gpu internals. + * + * The callback installed with fence_enable_sw_signaling will + * run before our wait_event_*timeout call, so we will see + * both the signaled fence and the changes to needs_reset. + */ + + if (intr) + t = wait_event_interruptible_timeout(rdev->fence_queue, + ((signaled = radeon_test_signaled(fence)) || + rdev->needs_reset), t); + else + t = wait_event_timeout(rdev->fence_queue, + ((signaled = radeon_test_signaled(fence)) || + rdev->needs_reset), t); + + if (t > 0 && !signaled) + return -EDEADLK; + return t; +} + +const struct fence_ops radeon_fence_ops = { + .get_driver_name = radeon_fence_get_driver_name, + .get_timeline_name = radeon_fence_get_timeline_name, + .enable_signaling = radeon_fence_enable_signaling, + .signaled = radeon_fence_is_signaled, + .wait = radeon_fence_default_wait, + .release = NULL, +}; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index f0bff4be67f1..7784911d78ef 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -323,6 +323,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) } } +/** + * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt + * + * @rdev: radeon device pointer + * @ring: ring whose interrupt you want to enable + * + * Enables the software interrupt for a specific ring (all asics). + * The software interrupt is generally used to signal a fence on + * a particular ring. + */ +bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring) +{ + return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1; +} + /** * radeon_irq_kms_sw_irq_put - disable software interrupt * -- GitLab From 87e392164ab6c0f3f055f8483dc7dc3f1afa19ad Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Wed, 27 Aug 2014 15:36:37 -0700 Subject: [PATCH 0317/1868] clk: mvebu: share locks between gate clocks Refactor mvebu_clk_gating_setup() to use a common spinlock instead of a unique lock for every instance of a struct clk_gating_ctrl object. This will be used later for a separate mux clock type that shares a register with gate clock types and needs to use the same lock to protect access to the register. Cc: Andrew Lunn Tested-by: Andrew Lunn Signed-off-by: Mike Turquette --- drivers/clk/mvebu/common.c | 9 ++++++--- drivers/clk/mvebu/common.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c index 25ceccf939ad..8145c4efc381 100644 --- a/drivers/clk/mvebu/common.c +++ b/drivers/clk/mvebu/common.c @@ -89,8 +89,10 @@ void __init mvebu_coreclk_setup(struct device_node *np, * Clock Gating Control */ +DEFINE_SPINLOCK(ctrl_gating_lock); + struct clk_gating_ctrl { - spinlock_t lock; + spinlock_t *lock; struct clk **gates; int num_gates; }; @@ -138,7 +140,8 @@ void __init mvebu_clk_gating_setup(struct device_node *np, if (WARN_ON(!ctrl)) goto ctrl_out; - spin_lock_init(&ctrl->lock); + /* lock must already be initialized */ + ctrl->lock = &ctrl_gating_lock; /* Count, allocate, and register clock gates */ for (n = 0; desc[n].name;) @@ -155,7 +158,7 @@ void __init mvebu_clk_gating_setup(struct device_node *np, (desc[n].parent) ? desc[n].parent : default_parent; ctrl->gates[n] = clk_register_gate(NULL, desc[n].name, parent, desc[n].flags, base, desc[n].bit_idx, - 0, &ctrl->lock); + 0, ctrl->lock); WARN_ON(IS_ERR(ctrl->gates[n])); } diff --git a/drivers/clk/mvebu/common.h b/drivers/clk/mvebu/common.h index f968b4d9df92..8cd28e47471c 100644 --- a/drivers/clk/mvebu/common.h +++ b/drivers/clk/mvebu/common.h @@ -17,6 +17,8 @@ #include +extern spinlock_t ctrl_gating_lock; + struct device_node; struct coreclk_ratio { -- GitLab From e8e8a9b0d86c093b208789fd71501c91a919ffdb Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Wed, 27 Aug 2014 17:11:38 -0700 Subject: [PATCH 0318/1868] clk: mvebu: powersave clock is a multiplexer Kirkwood is unique among the mvebu SoCs for having a clock multiplexer that feeds into the cpu. This multiplexer can select either the cpu pll or the ddr clock as its input signal, allowing for a choice between performance and power savings. This patch introduces the code needed to register the clock multiplexer on Kirkwood SoCs but does not include the clock data to actually register the clock. That will be done in a follow-up patch which is necessary to prevent breaking git bisect. Cc: Tomeu Vizoso Tested-by: Andrew Lunn Signed-off-by: Mike Turquette --- drivers/clk/mvebu/kirkwood.c | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c index ddb666a86500..f73a2fa8a561 100644 --- a/drivers/clk/mvebu/kirkwood.c +++ b/drivers/clk/mvebu/kirkwood.c @@ -13,9 +13,11 @@ */ #include +#include #include #include #include +#include #include "common.h" /* @@ -225,6 +227,91 @@ static const struct clk_gating_soc_desc kirkwood_gating_desc[] __initconst = { { } }; + +/* + * Clock Muxing Control + */ + +struct clk_muxing_soc_desc { + const char *name; + const char **parents; + int num_parents; + int shift; + int width; + unsigned long flags; +}; + +struct clk_muxing_ctrl { + spinlock_t *lock; + struct clk **muxes; + int num_muxes; +}; + +#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw) + +static struct clk *clk_muxing_get_src( + struct of_phandle_args *clkspec, void *data) +{ + struct clk_muxing_ctrl *ctrl = (struct clk_muxing_ctrl *)data; + int n; + + if (clkspec->args_count < 1) + return ERR_PTR(-EINVAL); + + for (n = 0; n < ctrl->num_muxes; n++) { + struct clk_mux *mux = + to_clk_mux(__clk_get_hw(ctrl->muxes[n])); + if (clkspec->args[0] == mux->shift) + return ctrl->muxes[n]; + } + return ERR_PTR(-ENODEV); +} + +static void __init kirkwood_clk_muxing_setup(struct device_node *np, + const struct clk_muxing_soc_desc *desc) +{ + struct clk_muxing_ctrl *ctrl; + void __iomem *base; + int n; + + base = of_iomap(np, 0); + if (WARN_ON(!base)) + return; + + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (WARN_ON(!ctrl)) + goto ctrl_out; + + /* lock must already be initialized */ + ctrl->lock = &ctrl_gating_lock; + + /* Count, allocate, and register clock muxes */ + for (n = 0; desc[n].name;) + n++; + + ctrl->num_muxes = n; + ctrl->muxes = kcalloc(ctrl->num_muxes, sizeof(struct clk *), + GFP_KERNEL); + if (WARN_ON(!ctrl->muxes)) + goto muxes_out; + + for (n = 0; n < ctrl->num_muxes; n++) { + ctrl->muxes[n] = clk_register_mux(NULL, desc[n].name, + desc[n].parents, desc[n].num_parents, + desc[n].flags, base, desc[n].shift, + desc[n].width, desc[n].flags, ctrl->lock); + WARN_ON(IS_ERR(ctrl->muxes[n])); + } + + of_clk_add_provider(np, clk_muxing_get_src, ctrl); + + return; +muxes_out: + kfree(ctrl); +ctrl_out: + iounmap(base); +} + static void __init kirkwood_clk_init(struct device_node *np) { struct device_node *cgnp = -- GitLab From b6c044a3d881c1b01fe0220ad76e548ea5d79b1c Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Aug 2014 10:36:59 +0800 Subject: [PATCH 0319/1868] gpu: ipu-v3: Select GENERIC_IRQ_CHIP to fix build error This driver uses GENERIC_IRQ_CHIP, so it needs to select GENERIC_IRQ_CHIP to avoid build error. Fixes below build errors: ERROR: "irq_alloc_domain_generic_chips" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_gc_mask_clr_bit" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_gc_mask_set_bit" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_generic_chip_ops" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_gc_ack_set_bit" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_get_domain_generic_chip" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 Signed-off-by: Axel Lin Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig index 2f228a2f2a48..01864a55264a 100644 --- a/drivers/gpu/ipu-v3/Kconfig +++ b/drivers/gpu/ipu-v3/Kconfig @@ -2,6 +2,7 @@ config IMX_IPUV3_CORE tristate "IPUv3 core support" depends on SOC_IMX5 || SOC_IMX6Q || SOC_IMX6SL || ARCH_MULTIPLATFORM depends on RESET_CONTROLLER + select GENERIC_IRQ_CHIP help Choose this if you have a i.MX5/6 system and want to use the Image Processing Unit. This option only enables IPU base support. -- GitLab From e4946cdcabcffd4814e153e71d28884b94c65e9e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Aug 2014 10:38:18 +0800 Subject: [PATCH 0320/1868] gpu: ipu-v3: Return proper error on ipu_add_client_devices error path Avoid returning an uninitialized variable in the error path. Signed-off-by: Axel Lin Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 04e7b2eafbdd..e340bebd3419 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1116,8 +1116,10 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base) id++, ®->pdata, sizeof(reg->pdata)); } - if (IS_ERR(pdev)) + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); goto err_register; + } } return 0; -- GitLab From e68885e24ad1a2d7d4ad6df04cbc9b623bd1d0b9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 2 Sep 2014 00:37:13 -0300 Subject: [PATCH 0321/1868] gpu: ipu-v3: ipu-smfc: Do not leave DEBUG defined Let's only define DEBUG for debugging purpose and not by default to avoid printing debugging message unnecessarily. Signed-off-by: Fabio Estevam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-smfc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index e4f85ad286fc..4939c5011d4d 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -8,7 +8,6 @@ * http://www.opensource.org/licenses/gpl-license.html * http://www.gnu.org/copyleft/gpl.html */ -#define DEBUG #include #include #include -- GitLab From 2ffd48f2e7ae06c3d7b2bcde9a0cb211d1a32468 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Aug 2014 10:52:40 -0700 Subject: [PATCH 0322/1868] gpu: ipu-v3: Add Camera Sensor Interface unit Adds the Camera Sensor Interface (CSI) unit required for video capture. Signed-off-by: Steve Longerbeam Removed the unused clk_get_rate in ipu_csi_init_interface and the ipu_csi_ccir_err_detection_enable/disable functions. Checkpatch cleanup. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Makefile | 2 +- drivers/gpu/ipu-v3/ipu-common.c | 44 +- drivers/gpu/ipu-v3/ipu-csi.c | 741 ++++++++++++++++++++++++++++++++ drivers/gpu/ipu-v3/ipu-prv.h | 6 + include/video/imx-ipu-v3.h | 32 +- 5 files changed, 810 insertions(+), 15 deletions(-) create mode 100644 drivers/gpu/ipu-v3/ipu-csi.c diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile index 0b42836caae1..d22bd06caa6d 100644 --- a/drivers/gpu/ipu-v3/Makefile +++ b/drivers/gpu/ipu-v3/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o -imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-dc.o ipu-di.o \ +imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-csi.o ipu-dc.o ipu-di.o \ ipu-dp.o ipu-dmfc.o ipu-smfc.o diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index cae543115856..511c364231a2 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -217,18 +217,6 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask) } EXPORT_SYMBOL_GPL(ipu_module_disable); -int ipu_csi_enable(struct ipu_soc *ipu, int csi) -{ - return ipu_module_enable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN); -} -EXPORT_SYMBOL_GPL(ipu_csi_enable); - -int ipu_csi_disable(struct ipu_soc *ipu, int csi) -{ - return ipu_module_disable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN); -} -EXPORT_SYMBOL_GPL(ipu_csi_disable); - int ipu_smfc_enable(struct ipu_soc *ipu) { return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); @@ -439,6 +427,8 @@ struct ipu_devtype { unsigned long cpmem_ofs; unsigned long srm_ofs; unsigned long tpm_ofs; + unsigned long csi0_ofs; + unsigned long csi1_ofs; unsigned long disp0_ofs; unsigned long disp1_ofs; unsigned long dc_tmpl_ofs; @@ -452,6 +442,8 @@ static struct ipu_devtype ipu_type_imx51 = { .cpmem_ofs = 0x1f000000, .srm_ofs = 0x1f040000, .tpm_ofs = 0x1f060000, + .csi0_ofs = 0x1f030000, + .csi1_ofs = 0x1f038000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, .dc_tmpl_ofs = 0x1f080000, @@ -465,6 +457,8 @@ static struct ipu_devtype ipu_type_imx53 = { .cpmem_ofs = 0x07000000, .srm_ofs = 0x07040000, .tpm_ofs = 0x07060000, + .csi0_ofs = 0x07030000, + .csi1_ofs = 0x07038000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, .dc_tmpl_ofs = 0x07080000, @@ -478,6 +472,8 @@ static struct ipu_devtype ipu_type_imx6q = { .cpmem_ofs = 0x00300000, .srm_ofs = 0x00340000, .tpm_ofs = 0x00360000, + .csi0_ofs = 0x00230000, + .csi1_ofs = 0x00238000, .disp0_ofs = 0x00240000, .disp1_ofs = 0x00248000, .dc_tmpl_ofs = 0x00380000, @@ -508,6 +504,20 @@ static int ipu_submodules_init(struct ipu_soc *ipu, goto err_cpmem; } + ret = ipu_csi_init(ipu, dev, 0, ipu_base + devtype->csi0_ofs, + IPU_CONF_CSI0_EN, ipu_clk); + if (ret) { + unit = "csi0"; + goto err_csi_0; + } + + ret = ipu_csi_init(ipu, dev, 1, ipu_base + devtype->csi1_ofs, + IPU_CONF_CSI1_EN, ipu_clk); + if (ret) { + unit = "csi1"; + goto err_csi_1; + } + ret = ipu_di_init(ipu, dev, 0, ipu_base + devtype->disp0_ofs, IPU_CONF_DI0_EN, ipu_clk); if (ret) { @@ -562,6 +572,10 @@ static int ipu_submodules_init(struct ipu_soc *ipu, err_di_1: ipu_di_exit(ipu, 0); err_di_0: + ipu_csi_exit(ipu, 1); +err_csi_1: + ipu_csi_exit(ipu, 0); +err_csi_0: ipu_cpmem_exit(ipu); err_cpmem: dev_err(&pdev->dev, "init %s failed with %d\n", unit, ret); @@ -640,6 +654,8 @@ static void ipu_submodules_exit(struct ipu_soc *ipu) ipu_dc_exit(ipu); ipu_di_exit(ipu, 1); ipu_di_exit(ipu, 0); + ipu_csi_exit(ipu, 1); + ipu_csi_exit(ipu, 0); ipu_cpmem_exit(ipu); } @@ -859,6 +875,10 @@ static int ipu_probe(struct platform_device *pdev) ipu_base + devtype->cm_ofs + IPU_CM_IDMAC_REG_OFS); dev_dbg(&pdev->dev, "cpmem: 0x%08lx\n", ipu_base + devtype->cpmem_ofs); + dev_dbg(&pdev->dev, "csi0: 0x%08lx\n", + ipu_base + devtype->csi0_ofs); + dev_dbg(&pdev->dev, "csi1: 0x%08lx\n", + ipu_base + devtype->csi1_ofs); dev_dbg(&pdev->dev, "disp0: 0x%08lx\n", ipu_base + devtype->disp0_ofs); dev_dbg(&pdev->dev, "disp1: 0x%08lx\n", diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c new file mode 100644 index 000000000000..d6f56471bd2a --- /dev/null +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -0,0 +1,741 @@ +/* + * Copyright (C) 2012-2014 Mentor Graphics Inc. + * Copyright (C) 2005-2009 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipu-prv.h" + +struct ipu_csi { + void __iomem *base; + int id; + u32 module; + struct clk *clk_ipu; /* IPU bus clock */ + spinlock_t lock; + bool inuse; + struct ipu_soc *ipu; +}; + +/* CSI Register Offsets */ +#define CSI_SENS_CONF 0x0000 +#define CSI_SENS_FRM_SIZE 0x0004 +#define CSI_ACT_FRM_SIZE 0x0008 +#define CSI_OUT_FRM_CTRL 0x000c +#define CSI_TST_CTRL 0x0010 +#define CSI_CCIR_CODE_1 0x0014 +#define CSI_CCIR_CODE_2 0x0018 +#define CSI_CCIR_CODE_3 0x001c +#define CSI_MIPI_DI 0x0020 +#define CSI_SKIP 0x0024 +#define CSI_CPD_CTRL 0x0028 +#define CSI_CPD_RC(n) (0x002c + ((n)*4)) +#define CSI_CPD_RS(n) (0x004c + ((n)*4)) +#define CSI_CPD_GRC(n) (0x005c + ((n)*4)) +#define CSI_CPD_GRS(n) (0x007c + ((n)*4)) +#define CSI_CPD_GBC(n) (0x008c + ((n)*4)) +#define CSI_CPD_GBS(n) (0x00Ac + ((n)*4)) +#define CSI_CPD_BC(n) (0x00Bc + ((n)*4)) +#define CSI_CPD_BS(n) (0x00Dc + ((n)*4)) +#define CSI_CPD_OFFSET1 0x00ec +#define CSI_CPD_OFFSET2 0x00f0 + +/* CSI Register Fields */ +#define CSI_SENS_CONF_DATA_FMT_SHIFT 8 +#define CSI_SENS_CONF_DATA_FMT_MASK 0x00000700 +#define CSI_SENS_CONF_DATA_FMT_RGB_YUV444 0L +#define CSI_SENS_CONF_DATA_FMT_YUV422_YUYV 1L +#define CSI_SENS_CONF_DATA_FMT_YUV422_UYVY 2L +#define CSI_SENS_CONF_DATA_FMT_BAYER 3L +#define CSI_SENS_CONF_DATA_FMT_RGB565 4L +#define CSI_SENS_CONF_DATA_FMT_RGB555 5L +#define CSI_SENS_CONF_DATA_FMT_RGB444 6L +#define CSI_SENS_CONF_DATA_FMT_JPEG 7L + +#define CSI_SENS_CONF_VSYNC_POL_SHIFT 0 +#define CSI_SENS_CONF_HSYNC_POL_SHIFT 1 +#define CSI_SENS_CONF_DATA_POL_SHIFT 2 +#define CSI_SENS_CONF_PIX_CLK_POL_SHIFT 3 +#define CSI_SENS_CONF_SENS_PRTCL_MASK 0x00000070 +#define CSI_SENS_CONF_SENS_PRTCL_SHIFT 4 +#define CSI_SENS_CONF_PACK_TIGHT_SHIFT 7 +#define CSI_SENS_CONF_DATA_WIDTH_SHIFT 11 +#define CSI_SENS_CONF_EXT_VSYNC_SHIFT 15 +#define CSI_SENS_CONF_DIVRATIO_SHIFT 16 + +#define CSI_SENS_CONF_DIVRATIO_MASK 0x00ff0000 +#define CSI_SENS_CONF_DATA_DEST_SHIFT 24 +#define CSI_SENS_CONF_DATA_DEST_MASK 0x07000000 +#define CSI_SENS_CONF_JPEG8_EN_SHIFT 27 +#define CSI_SENS_CONF_JPEG_EN_SHIFT 28 +#define CSI_SENS_CONF_FORCE_EOF_SHIFT 29 +#define CSI_SENS_CONF_DATA_EN_POL_SHIFT 31 + +#define CSI_DATA_DEST_IC 2 +#define CSI_DATA_DEST_IDMAC 4 + +#define CSI_CCIR_ERR_DET_EN 0x01000000 +#define CSI_HORI_DOWNSIZE_EN 0x80000000 +#define CSI_VERT_DOWNSIZE_EN 0x40000000 +#define CSI_TEST_GEN_MODE_EN 0x01000000 + +#define CSI_HSC_MASK 0x1fff0000 +#define CSI_HSC_SHIFT 16 +#define CSI_VSC_MASK 0x00000fff +#define CSI_VSC_SHIFT 0 + +#define CSI_TEST_GEN_R_MASK 0x000000ff +#define CSI_TEST_GEN_R_SHIFT 0 +#define CSI_TEST_GEN_G_MASK 0x0000ff00 +#define CSI_TEST_GEN_G_SHIFT 8 +#define CSI_TEST_GEN_B_MASK 0x00ff0000 +#define CSI_TEST_GEN_B_SHIFT 16 + +#define CSI_MAX_RATIO_SKIP_SMFC_MASK 0x00000007 +#define CSI_MAX_RATIO_SKIP_SMFC_SHIFT 0 +#define CSI_SKIP_SMFC_MASK 0x000000f8 +#define CSI_SKIP_SMFC_SHIFT 3 +#define CSI_ID_2_SKIP_MASK 0x00000300 +#define CSI_ID_2_SKIP_SHIFT 8 + +#define CSI_COLOR_FIRST_ROW_MASK 0x00000002 +#define CSI_COLOR_FIRST_COMP_MASK 0x00000001 + +/* MIPI CSI-2 data types */ +#define MIPI_DT_YUV420 0x18 /* YYY.../UYVY.... */ +#define MIPI_DT_YUV420_LEGACY 0x1a /* UYY.../VYY... */ +#define MIPI_DT_YUV422 0x1e /* UYVY... */ +#define MIPI_DT_RGB444 0x20 +#define MIPI_DT_RGB555 0x21 +#define MIPI_DT_RGB565 0x22 +#define MIPI_DT_RGB666 0x23 +#define MIPI_DT_RGB888 0x24 +#define MIPI_DT_RAW6 0x28 +#define MIPI_DT_RAW7 0x29 +#define MIPI_DT_RAW8 0x2a +#define MIPI_DT_RAW10 0x2b +#define MIPI_DT_RAW12 0x2c +#define MIPI_DT_RAW14 0x2d + +/* + * Bitfield of CSI bus signal polarities and modes. + */ +struct ipu_csi_bus_config { + unsigned data_width:4; + unsigned clk_mode:3; + unsigned ext_vsync:1; + unsigned vsync_pol:1; + unsigned hsync_pol:1; + unsigned pixclk_pol:1; + unsigned data_pol:1; + unsigned sens_clksrc:1; + unsigned pack_tight:1; + unsigned force_eof:1; + unsigned data_en_pol:1; + + unsigned data_fmt; + unsigned mipi_dt; +}; + +/* + * Enumeration of CSI data bus widths. + */ +enum ipu_csi_data_width { + IPU_CSI_DATA_WIDTH_4 = 0, + IPU_CSI_DATA_WIDTH_8 = 1, + IPU_CSI_DATA_WIDTH_10 = 3, + IPU_CSI_DATA_WIDTH_12 = 5, + IPU_CSI_DATA_WIDTH_16 = 9, +}; + +/* + * Enumeration of CSI clock modes. + */ +enum ipu_csi_clk_mode { + IPU_CSI_CLK_MODE_GATED_CLK, + IPU_CSI_CLK_MODE_NONGATED_CLK, + IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE, + IPU_CSI_CLK_MODE_CCIR656_INTERLACED, + IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR, + IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR, + IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_DDR, + IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_SDR, +}; + +static inline u32 ipu_csi_read(struct ipu_csi *csi, unsigned offset) +{ + return readl(csi->base + offset); +} + +static inline void ipu_csi_write(struct ipu_csi *csi, u32 value, + unsigned offset) +{ + writel(value, csi->base + offset); +} + +/* + * Set mclk division ratio for generating test mode mclk. Only used + * for test generator. + */ +static int ipu_csi_set_testgen_mclk(struct ipu_csi *csi, u32 pixel_clk, + u32 ipu_clk) +{ + u32 temp; + u32 div_ratio; + + div_ratio = (ipu_clk / pixel_clk) - 1; + + if (div_ratio > 0xFF || div_ratio < 0) { + dev_err(csi->ipu->dev, + "value of pixel_clk extends normal range\n"); + return -EINVAL; + } + + temp = ipu_csi_read(csi, CSI_SENS_CONF); + temp &= ~CSI_SENS_CONF_DIVRATIO_MASK; + ipu_csi_write(csi, temp | (div_ratio << CSI_SENS_CONF_DIVRATIO_SHIFT), + CSI_SENS_CONF); + + return 0; +} + +/* + * Find the CSI data format and data width for the given V4L2 media + * bus pixel format code. + */ +static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) +{ + switch (mbus_code) { + case V4L2_MBUS_FMT_BGR565_2X8_BE: + case V4L2_MBUS_FMT_BGR565_2X8_LE: + case V4L2_MBUS_FMT_RGB565_2X8_BE: + case V4L2_MBUS_FMT_RGB565_2X8_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_RGB565; + cfg->mipi_dt = MIPI_DT_RGB565; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_RGB444_2X8_PADHI_BE: + case V4L2_MBUS_FMT_RGB444_2X8_PADHI_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_RGB444; + cfg->mipi_dt = MIPI_DT_RGB444; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE: + case V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_RGB555; + cfg->mipi_dt = MIPI_DT_RGB555; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_UYVY8_2X8: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_UYVY; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_YUYV8_2X8: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_YUYV; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_UYVY8_1X16: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_UYVY; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_16; + break; + case V4L2_MBUS_FMT_YUYV8_1X16: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_YUYV; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_16; + break; + case V4L2_MBUS_FMT_SBGGR8_1X8: + case V4L2_MBUS_FMT_SGBRG8_1X8: + case V4L2_MBUS_FMT_SGRBG8_1X8: + case V4L2_MBUS_FMT_SRGGB8_1X8: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW8; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8: + case V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8: + case V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8: + case V4L2_MBUS_FMT_SRGGB10_DPCM8_1X8: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADHI_BE: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADHI_LE: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADLO_BE: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADLO_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW10; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_SBGGR10_1X10: + case V4L2_MBUS_FMT_SGBRG10_1X10: + case V4L2_MBUS_FMT_SGRBG10_1X10: + case V4L2_MBUS_FMT_SRGGB10_1X10: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW10; + cfg->data_width = IPU_CSI_DATA_WIDTH_10; + break; + case V4L2_MBUS_FMT_SBGGR12_1X12: + case V4L2_MBUS_FMT_SGBRG12_1X12: + case V4L2_MBUS_FMT_SGRBG12_1X12: + case V4L2_MBUS_FMT_SRGGB12_1X12: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW12; + cfg->data_width = IPU_CSI_DATA_WIDTH_12; + break; + case V4L2_MBUS_FMT_JPEG_1X8: + /* TODO */ + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_JPEG; + cfg->mipi_dt = MIPI_DT_RAW8; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * Fill a CSI bus config struct from mbus_config and mbus_framefmt. + */ +static void fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg, + struct v4l2_mbus_config *mbus_cfg, + struct v4l2_mbus_framefmt *mbus_fmt) +{ + memset(csicfg, 0, sizeof(*csicfg)); + + mbus_code_to_bus_cfg(csicfg, mbus_fmt->code); + + switch (mbus_cfg->type) { + case V4L2_MBUS_PARALLEL: + csicfg->ext_vsync = 1; + csicfg->vsync_pol = (mbus_cfg->flags & + V4L2_MBUS_VSYNC_ACTIVE_LOW) ? 1 : 0; + csicfg->hsync_pol = (mbus_cfg->flags & + V4L2_MBUS_HSYNC_ACTIVE_LOW) ? 1 : 0; + csicfg->pixclk_pol = (mbus_cfg->flags & + V4L2_MBUS_PCLK_SAMPLE_FALLING) ? 1 : 0; + csicfg->clk_mode = IPU_CSI_CLK_MODE_GATED_CLK; + break; + case V4L2_MBUS_BT656: + csicfg->ext_vsync = 0; + if (V4L2_FIELD_HAS_BOTH(mbus_fmt->field)) + csicfg->clk_mode = IPU_CSI_CLK_MODE_CCIR656_INTERLACED; + else + csicfg->clk_mode = IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE; + break; + case V4L2_MBUS_CSI2: + /* + * MIPI CSI-2 requires non gated clock mode, all other + * parameters are not applicable for MIPI CSI-2 bus. + */ + csicfg->clk_mode = IPU_CSI_CLK_MODE_NONGATED_CLK; + break; + default: + /* will never get here, keep compiler quiet */ + break; + } +} + +int ipu_csi_init_interface(struct ipu_csi *csi, + struct v4l2_mbus_config *mbus_cfg, + struct v4l2_mbus_framefmt *mbus_fmt) +{ + struct ipu_csi_bus_config cfg; + unsigned long flags; + u32 data = 0; + + fill_csi_bus_cfg(&cfg, mbus_cfg, mbus_fmt); + + /* Set the CSI_SENS_CONF register remaining fields */ + data |= cfg.data_width << CSI_SENS_CONF_DATA_WIDTH_SHIFT | + cfg.data_fmt << CSI_SENS_CONF_DATA_FMT_SHIFT | + cfg.data_pol << CSI_SENS_CONF_DATA_POL_SHIFT | + cfg.vsync_pol << CSI_SENS_CONF_VSYNC_POL_SHIFT | + cfg.hsync_pol << CSI_SENS_CONF_HSYNC_POL_SHIFT | + cfg.pixclk_pol << CSI_SENS_CONF_PIX_CLK_POL_SHIFT | + cfg.ext_vsync << CSI_SENS_CONF_EXT_VSYNC_SHIFT | + cfg.clk_mode << CSI_SENS_CONF_SENS_PRTCL_SHIFT | + cfg.pack_tight << CSI_SENS_CONF_PACK_TIGHT_SHIFT | + cfg.force_eof << CSI_SENS_CONF_FORCE_EOF_SHIFT | + cfg.data_en_pol << CSI_SENS_CONF_DATA_EN_POL_SHIFT; + + spin_lock_irqsave(&csi->lock, flags); + + ipu_csi_write(csi, data, CSI_SENS_CONF); + + /* Setup sensor frame size */ + ipu_csi_write(csi, + (mbus_fmt->width - 1) | ((mbus_fmt->height - 1) << 16), + CSI_SENS_FRM_SIZE); + + /* Set CCIR registers */ + + switch (cfg.clk_mode) { + case IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE: + ipu_csi_write(csi, 0x40030, CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + break; + case IPU_CSI_CLK_MODE_CCIR656_INTERLACED: + if (mbus_fmt->width == 720 && mbus_fmt->height == 576) { + /* + * PAL case + * + * Field0BlankEnd = 0x6, Field0BlankStart = 0x2, + * Field0ActiveEnd = 0x4, Field0ActiveStart = 0 + * Field1BlankEnd = 0x7, Field1BlankStart = 0x3, + * Field1ActiveEnd = 0x5, Field1ActiveStart = 0x1 + */ + ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN, + CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + + } else if (mbus_fmt->width == 720 && mbus_fmt->height == 480) { + /* + * NTSC case + * + * Field0BlankEnd = 0x7, Field0BlankStart = 0x3, + * Field0ActiveEnd = 0x5, Field0ActiveStart = 0x1 + * Field1BlankEnd = 0x6, Field1BlankStart = 0x2, + * Field1ActiveEnd = 0x4, Field1ActiveStart = 0 + */ + ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN, + CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + } else { + dev_err(csi->ipu->dev, + "Unsupported CCIR656 interlaced video mode\n"); + spin_unlock_irqrestore(&csi->lock, flags); + return -EINVAL; + } + break; + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_SDR: + ipu_csi_write(csi, 0x40030 | CSI_CCIR_ERR_DET_EN, + CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + break; + case IPU_CSI_CLK_MODE_GATED_CLK: + case IPU_CSI_CLK_MODE_NONGATED_CLK: + ipu_csi_write(csi, 0, CSI_CCIR_CODE_1); + break; + } + + dev_dbg(csi->ipu->dev, "CSI_SENS_CONF = 0x%08X\n", + ipu_csi_read(csi, CSI_SENS_CONF)); + dev_dbg(csi->ipu->dev, "CSI_ACT_FRM_SIZE = 0x%08X\n", + ipu_csi_read(csi, CSI_ACT_FRM_SIZE)); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_init_interface); + +bool ipu_csi_is_interlaced(struct ipu_csi *csi) +{ + unsigned long flags; + u32 sensor_protocol; + + spin_lock_irqsave(&csi->lock, flags); + sensor_protocol = + (ipu_csi_read(csi, CSI_SENS_CONF) & + CSI_SENS_CONF_SENS_PRTCL_MASK) >> + CSI_SENS_CONF_SENS_PRTCL_SHIFT; + spin_unlock_irqrestore(&csi->lock, flags); + + switch (sensor_protocol) { + case IPU_CSI_CLK_MODE_GATED_CLK: + case IPU_CSI_CLK_MODE_NONGATED_CLK: + case IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE: + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR: + return false; + case IPU_CSI_CLK_MODE_CCIR656_INTERLACED: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_SDR: + return true; + default: + dev_err(csi->ipu->dev, + "CSI %d sensor protocol unsupported\n", csi->id); + return false; + } +} +EXPORT_SYMBOL_GPL(ipu_csi_is_interlaced); + +void ipu_csi_get_window(struct ipu_csi *csi, struct v4l2_rect *w) +{ + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&csi->lock, flags); + + reg = ipu_csi_read(csi, CSI_ACT_FRM_SIZE); + w->width = (reg & 0xFFFF) + 1; + w->height = (reg >> 16 & 0xFFFF) + 1; + + reg = ipu_csi_read(csi, CSI_OUT_FRM_CTRL); + w->left = (reg & CSI_HSC_MASK) >> CSI_HSC_SHIFT; + w->top = (reg & CSI_VSC_MASK) >> CSI_VSC_SHIFT; + + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_get_window); + +void ipu_csi_set_window(struct ipu_csi *csi, struct v4l2_rect *w) +{ + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&csi->lock, flags); + + ipu_csi_write(csi, (w->width - 1) | ((w->height - 1) << 16), + CSI_ACT_FRM_SIZE); + + reg = ipu_csi_read(csi, CSI_OUT_FRM_CTRL); + reg &= ~(CSI_HSC_MASK | CSI_VSC_MASK); + reg |= ((w->top << CSI_VSC_SHIFT) | (w->left << CSI_HSC_SHIFT)); + ipu_csi_write(csi, reg, CSI_OUT_FRM_CTRL); + + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_set_window); + +void ipu_csi_set_test_generator(struct ipu_csi *csi, bool active, + u32 r_value, u32 g_value, u32 b_value, + u32 pix_clk) +{ + unsigned long flags; + u32 ipu_clk = clk_get_rate(csi->clk_ipu); + u32 temp; + + spin_lock_irqsave(&csi->lock, flags); + + temp = ipu_csi_read(csi, CSI_TST_CTRL); + + if (active == false) { + temp &= ~CSI_TEST_GEN_MODE_EN; + ipu_csi_write(csi, temp, CSI_TST_CTRL); + } else { + /* Set sensb_mclk div_ratio */ + ipu_csi_set_testgen_mclk(csi, pix_clk, ipu_clk); + + temp &= ~(CSI_TEST_GEN_R_MASK | CSI_TEST_GEN_G_MASK | + CSI_TEST_GEN_B_MASK); + temp |= CSI_TEST_GEN_MODE_EN; + temp |= (r_value << CSI_TEST_GEN_R_SHIFT) | + (g_value << CSI_TEST_GEN_G_SHIFT) | + (b_value << CSI_TEST_GEN_B_SHIFT); + ipu_csi_write(csi, temp, CSI_TST_CTRL); + } + + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_set_test_generator); + +int ipu_csi_set_mipi_datatype(struct ipu_csi *csi, u32 vc, + struct v4l2_mbus_framefmt *mbus_fmt) +{ + struct ipu_csi_bus_config cfg; + unsigned long flags; + u32 temp; + + if (vc > 3) + return -EINVAL; + + mbus_code_to_bus_cfg(&cfg, mbus_fmt->code); + + spin_lock_irqsave(&csi->lock, flags); + + temp = ipu_csi_read(csi, CSI_MIPI_DI); + temp &= ~(0xff << (vc * 8)); + temp |= (cfg.mipi_dt << (vc * 8)); + ipu_csi_write(csi, temp, CSI_MIPI_DI); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_set_mipi_datatype); + +int ipu_csi_set_skip_smfc(struct ipu_csi *csi, u32 skip, + u32 max_ratio, u32 id) +{ + unsigned long flags; + u32 temp; + + if (max_ratio > 5 || id > 3) + return -EINVAL; + + spin_lock_irqsave(&csi->lock, flags); + + temp = ipu_csi_read(csi, CSI_SKIP); + temp &= ~(CSI_MAX_RATIO_SKIP_SMFC_MASK | CSI_ID_2_SKIP_MASK | + CSI_SKIP_SMFC_MASK); + temp |= (max_ratio << CSI_MAX_RATIO_SKIP_SMFC_SHIFT) | + (id << CSI_ID_2_SKIP_SHIFT) | + (skip << CSI_SKIP_SMFC_SHIFT); + ipu_csi_write(csi, temp, CSI_SKIP); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_set_skip_smfc); + +int ipu_csi_set_dest(struct ipu_csi *csi, enum ipu_csi_dest csi_dest) +{ + unsigned long flags; + u32 csi_sens_conf, dest; + + if (csi_dest == IPU_CSI_DEST_IDMAC) + dest = CSI_DATA_DEST_IDMAC; + else + dest = CSI_DATA_DEST_IC; /* IC or VDIC */ + + spin_lock_irqsave(&csi->lock, flags); + + csi_sens_conf = ipu_csi_read(csi, CSI_SENS_CONF); + csi_sens_conf &= ~CSI_SENS_CONF_DATA_DEST_MASK; + csi_sens_conf |= (dest << CSI_SENS_CONF_DATA_DEST_SHIFT); + ipu_csi_write(csi, csi_sens_conf, CSI_SENS_CONF); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_set_dest); + +int ipu_csi_enable(struct ipu_csi *csi) +{ + ipu_module_enable(csi->ipu, csi->module); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_enable); + +int ipu_csi_disable(struct ipu_csi *csi) +{ + ipu_module_disable(csi->ipu, csi->module); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_disable); + +struct ipu_csi *ipu_csi_get(struct ipu_soc *ipu, int id) +{ + unsigned long flags; + struct ipu_csi *csi, *ret; + + if (id > 1) + return ERR_PTR(-EINVAL); + + csi = ipu->csi_priv[id]; + ret = csi; + + spin_lock_irqsave(&csi->lock, flags); + + if (csi->inuse) { + ret = ERR_PTR(-EBUSY); + goto unlock; + } + + csi->inuse = true; +unlock: + spin_unlock_irqrestore(&csi->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_csi_get); + +void ipu_csi_put(struct ipu_csi *csi) +{ + unsigned long flags; + + spin_lock_irqsave(&csi->lock, flags); + csi->inuse = false; + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_put); + +int ipu_csi_init(struct ipu_soc *ipu, struct device *dev, int id, + unsigned long base, u32 module, struct clk *clk_ipu) +{ + struct ipu_csi *csi; + + if (id > 1) + return -ENODEV; + + csi = devm_kzalloc(dev, sizeof(*csi), GFP_KERNEL); + if (!csi) + return -ENOMEM; + + ipu->csi_priv[id] = csi; + + spin_lock_init(&csi->lock); + csi->module = module; + csi->id = id; + csi->clk_ipu = clk_ipu; + csi->base = devm_ioremap(dev, base, PAGE_SIZE); + if (!csi->base) + return -ENOMEM; + + dev_dbg(dev, "CSI%d base: 0x%08lx remapped to %p\n", + id, base, csi->base); + csi->ipu = ipu; + + return 0; +} + +void ipu_csi_exit(struct ipu_soc *ipu, int id) +{ +} + +void ipu_csi_dump(struct ipu_csi *csi) +{ + dev_dbg(csi->ipu->dev, "CSI_SENS_CONF: %08x\n", + ipu_csi_read(csi, CSI_SENS_CONF)); + dev_dbg(csi->ipu->dev, "CSI_SENS_FRM_SIZE: %08x\n", + ipu_csi_read(csi, CSI_SENS_FRM_SIZE)); + dev_dbg(csi->ipu->dev, "CSI_ACT_FRM_SIZE: %08x\n", + ipu_csi_read(csi, CSI_ACT_FRM_SIZE)); + dev_dbg(csi->ipu->dev, "CSI_OUT_FRM_CTRL: %08x\n", + ipu_csi_read(csi, CSI_OUT_FRM_CTRL)); + dev_dbg(csi->ipu->dev, "CSI_TST_CTRL: %08x\n", + ipu_csi_read(csi, CSI_TST_CTRL)); + dev_dbg(csi->ipu->dev, "CSI_CCIR_CODE_1: %08x\n", + ipu_csi_read(csi, CSI_CCIR_CODE_1)); + dev_dbg(csi->ipu->dev, "CSI_CCIR_CODE_2: %08x\n", + ipu_csi_read(csi, CSI_CCIR_CODE_2)); + dev_dbg(csi->ipu->dev, "CSI_CCIR_CODE_3: %08x\n", + ipu_csi_read(csi, CSI_CCIR_CODE_3)); + dev_dbg(csi->ipu->dev, "CSI_MIPI_DI: %08x\n", + ipu_csi_read(csi, CSI_MIPI_DI)); + dev_dbg(csi->ipu->dev, "CSI_SKIP: %08x\n", + ipu_csi_read(csi, CSI_SKIP)); +} +EXPORT_SYMBOL_GPL(ipu_csi_dump); diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 1a5c55c05fe8..9b274f1259e1 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -157,6 +157,7 @@ struct ipuv3_channel { }; struct ipu_cpmem; +struct ipu_csi; struct ipu_dc_priv; struct ipu_dmfc_priv; struct ipu_di; @@ -189,6 +190,7 @@ struct ipu_soc { struct ipu_dp_priv *dp_priv; struct ipu_dmfc_priv *dmfc_priv; struct ipu_di *di_priv[2]; + struct ipu_csi *csi_priv[2]; struct ipu_smfc_priv *smfc_priv; }; @@ -211,6 +213,10 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask); bool ipu_idmac_channel_busy(struct ipu_soc *ipu, unsigned int chno); int ipu_wait_interrupt(struct ipu_soc *ipu, int irq, int ms); +int ipu_csi_init(struct ipu_soc *ipu, struct device *dev, int id, + unsigned long base, u32 module, struct clk *clk_ipu); +void ipu_csi_exit(struct ipu_soc *ipu, int id); + int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id, unsigned long base, u32 module, struct clk *ipu_clk); void ipu_di_exit(struct ipu_soc *ipu, int id); diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index f80fe13b0d4d..6d254275192b 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -16,6 +16,7 @@ #include #include #include +#include struct ipu_soc; @@ -61,6 +62,15 @@ struct ipu_di_signal_cfg { u8 vsync_pin; }; +/* + * Enumeration of CSI destinations + */ +enum ipu_csi_dest { + IPU_CSI_DEST_IDMAC, /* to memory via SMFC */ + IPU_CSI_DEST_IC, /* to Image Converter */ + IPU_CSI_DEST_VDIC, /* to VDIC */ +}; + enum ipu_color_space { IPUV3_COLORSPACE_RGB, IPUV3_COLORSPACE_YUV, @@ -211,8 +221,26 @@ int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha, /* * IPU CMOS Sensor Interface (csi) functions */ -int ipu_csi_enable(struct ipu_soc *ipu, int csi); -int ipu_csi_disable(struct ipu_soc *ipu, int csi); +struct ipu_csi; +int ipu_csi_init_interface(struct ipu_csi *csi, + struct v4l2_mbus_config *mbus_cfg, + struct v4l2_mbus_framefmt *mbus_fmt); +bool ipu_csi_is_interlaced(struct ipu_csi *csi); +void ipu_csi_get_window(struct ipu_csi *csi, struct v4l2_rect *w); +void ipu_csi_set_window(struct ipu_csi *csi, struct v4l2_rect *w); +void ipu_csi_set_test_generator(struct ipu_csi *csi, bool active, + u32 r_value, u32 g_value, u32 b_value, + u32 pix_clk); +int ipu_csi_set_mipi_datatype(struct ipu_csi *csi, u32 vc, + struct v4l2_mbus_framefmt *mbus_fmt); +int ipu_csi_set_skip_smfc(struct ipu_csi *csi, u32 skip, + u32 max_ratio, u32 id); +int ipu_csi_set_dest(struct ipu_csi *csi, enum ipu_csi_dest csi_dest); +int ipu_csi_enable(struct ipu_csi *csi); +int ipu_csi_disable(struct ipu_csi *csi); +struct ipu_csi *ipu_csi_get(struct ipu_soc *ipu, int id); +void ipu_csi_put(struct ipu_csi *csi); +void ipu_csi_dump(struct ipu_csi *csi); /* * IPU Sensor Multiple FIFO Controller (SMFC) functions -- GitLab From 1aa8ea0d2bd5d4ba7b5d2b132a02157bc1fb9793 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Mon, 11 Aug 2014 13:04:50 +0200 Subject: [PATCH 0323/1868] gpu: ipu-v3: Add Image Converter unit Adds the Image Converter (IC) unit. Signed-off-by: Steve Longerbeam Condensed the three CSC setup functions into a single one that uses static tables to set up the CSC task parameters. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Makefile | 2 +- drivers/gpu/ipu-v3/ipu-common.c | 19 +- drivers/gpu/ipu-v3/ipu-ic.c | 778 ++++++++++++++++++++++++++++++++ drivers/gpu/ipu-v3/ipu-prv.h | 6 + include/video/imx-ipu-v3.h | 45 ++ 5 files changed, 848 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/ipu-v3/ipu-ic.c diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile index d22bd06caa6d..107ec236a4a6 100644 --- a/drivers/gpu/ipu-v3/Makefile +++ b/drivers/gpu/ipu-v3/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-csi.o ipu-dc.o ipu-di.o \ - ipu-dp.o ipu-dmfc.o ipu-smfc.o + ipu-dp.o ipu-dmfc.o ipu-ic.o ipu-smfc.o diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 511c364231a2..312eef6ffcad 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -429,6 +429,7 @@ struct ipu_devtype { unsigned long tpm_ofs; unsigned long csi0_ofs; unsigned long csi1_ofs; + unsigned long ic_ofs; unsigned long disp0_ofs; unsigned long disp1_ofs; unsigned long dc_tmpl_ofs; @@ -444,6 +445,7 @@ static struct ipu_devtype ipu_type_imx51 = { .tpm_ofs = 0x1f060000, .csi0_ofs = 0x1f030000, .csi1_ofs = 0x1f038000, + .ic_ofs = 0x1f020000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, .dc_tmpl_ofs = 0x1f080000, @@ -459,6 +461,7 @@ static struct ipu_devtype ipu_type_imx53 = { .tpm_ofs = 0x07060000, .csi0_ofs = 0x07030000, .csi1_ofs = 0x07038000, + .ic_ofs = 0x07020000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, .dc_tmpl_ofs = 0x07080000, @@ -474,6 +477,7 @@ static struct ipu_devtype ipu_type_imx6q = { .tpm_ofs = 0x00360000, .csi0_ofs = 0x00230000, .csi1_ofs = 0x00238000, + .ic_ofs = 0x00220000, .disp0_ofs = 0x00240000, .disp1_ofs = 0x00248000, .dc_tmpl_ofs = 0x00380000, @@ -518,8 +522,16 @@ static int ipu_submodules_init(struct ipu_soc *ipu, goto err_csi_1; } + ret = ipu_ic_init(ipu, dev, + ipu_base + devtype->ic_ofs, + ipu_base + devtype->tpm_ofs); + if (ret) { + unit = "ic"; + goto err_ic; + } + ret = ipu_di_init(ipu, dev, 0, ipu_base + devtype->disp0_ofs, - IPU_CONF_DI0_EN, ipu_clk); + IPU_CONF_DI0_EN, ipu_clk); if (ret) { unit = "di0"; goto err_di_0; @@ -572,6 +584,8 @@ static int ipu_submodules_init(struct ipu_soc *ipu, err_di_1: ipu_di_exit(ipu, 0); err_di_0: + ipu_ic_exit(ipu); +err_ic: ipu_csi_exit(ipu, 1); err_csi_1: ipu_csi_exit(ipu, 0); @@ -654,6 +668,7 @@ static void ipu_submodules_exit(struct ipu_soc *ipu) ipu_dc_exit(ipu); ipu_di_exit(ipu, 1); ipu_di_exit(ipu, 0); + ipu_ic_exit(ipu); ipu_csi_exit(ipu, 1); ipu_csi_exit(ipu, 0); ipu_cpmem_exit(ipu); @@ -879,6 +894,8 @@ static int ipu_probe(struct platform_device *pdev) ipu_base + devtype->csi0_ofs); dev_dbg(&pdev->dev, "csi1: 0x%08lx\n", ipu_base + devtype->csi1_ofs); + dev_dbg(&pdev->dev, "ic: 0x%08lx\n", + ipu_base + devtype->ic_ofs); dev_dbg(&pdev->dev, "disp0: 0x%08lx\n", ipu_base + devtype->disp0_ofs); dev_dbg(&pdev->dev, "disp1: 0x%08lx\n", diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c new file mode 100644 index 000000000000..ad75588e1629 --- /dev/null +++ b/drivers/gpu/ipu-v3/ipu-ic.c @@ -0,0 +1,778 @@ +/* + * Copyright (C) 2012-2014 Mentor Graphics Inc. + * Copyright 2005-2012 Freescale Semiconductor, Inc. All Rights Reserved. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ipu-prv.h" + +/* IC Register Offsets */ +#define IC_CONF 0x0000 +#define IC_PRP_ENC_RSC 0x0004 +#define IC_PRP_VF_RSC 0x0008 +#define IC_PP_RSC 0x000C +#define IC_CMBP_1 0x0010 +#define IC_CMBP_2 0x0014 +#define IC_IDMAC_1 0x0018 +#define IC_IDMAC_2 0x001C +#define IC_IDMAC_3 0x0020 +#define IC_IDMAC_4 0x0024 + +/* IC Register Fields */ +#define IC_CONF_PRPENC_EN (1 << 0) +#define IC_CONF_PRPENC_CSC1 (1 << 1) +#define IC_CONF_PRPENC_ROT_EN (1 << 2) +#define IC_CONF_PRPVF_EN (1 << 8) +#define IC_CONF_PRPVF_CSC1 (1 << 9) +#define IC_CONF_PRPVF_CSC2 (1 << 10) +#define IC_CONF_PRPVF_CMB (1 << 11) +#define IC_CONF_PRPVF_ROT_EN (1 << 12) +#define IC_CONF_PP_EN (1 << 16) +#define IC_CONF_PP_CSC1 (1 << 17) +#define IC_CONF_PP_CSC2 (1 << 18) +#define IC_CONF_PP_CMB (1 << 19) +#define IC_CONF_PP_ROT_EN (1 << 20) +#define IC_CONF_IC_GLB_LOC_A (1 << 28) +#define IC_CONF_KEY_COLOR_EN (1 << 29) +#define IC_CONF_RWS_EN (1 << 30) +#define IC_CONF_CSI_MEM_WR_EN (1 << 31) + +#define IC_IDMAC_1_CB0_BURST_16 (1 << 0) +#define IC_IDMAC_1_CB1_BURST_16 (1 << 1) +#define IC_IDMAC_1_CB2_BURST_16 (1 << 2) +#define IC_IDMAC_1_CB3_BURST_16 (1 << 3) +#define IC_IDMAC_1_CB4_BURST_16 (1 << 4) +#define IC_IDMAC_1_CB5_BURST_16 (1 << 5) +#define IC_IDMAC_1_CB6_BURST_16 (1 << 6) +#define IC_IDMAC_1_CB7_BURST_16 (1 << 7) +#define IC_IDMAC_1_PRPENC_ROT_MASK (0x7 << 11) +#define IC_IDMAC_1_PRPENC_ROT_OFFSET 11 +#define IC_IDMAC_1_PRPVF_ROT_MASK (0x7 << 14) +#define IC_IDMAC_1_PRPVF_ROT_OFFSET 14 +#define IC_IDMAC_1_PP_ROT_MASK (0x7 << 17) +#define IC_IDMAC_1_PP_ROT_OFFSET 17 +#define IC_IDMAC_1_PP_FLIP_RS (1 << 22) +#define IC_IDMAC_1_PRPVF_FLIP_RS (1 << 21) +#define IC_IDMAC_1_PRPENC_FLIP_RS (1 << 20) + +#define IC_IDMAC_2_PRPENC_HEIGHT_MASK (0x3ff << 0) +#define IC_IDMAC_2_PRPENC_HEIGHT_OFFSET 0 +#define IC_IDMAC_2_PRPVF_HEIGHT_MASK (0x3ff << 10) +#define IC_IDMAC_2_PRPVF_HEIGHT_OFFSET 10 +#define IC_IDMAC_2_PP_HEIGHT_MASK (0x3ff << 20) +#define IC_IDMAC_2_PP_HEIGHT_OFFSET 20 + +#define IC_IDMAC_3_PRPENC_WIDTH_MASK (0x3ff << 0) +#define IC_IDMAC_3_PRPENC_WIDTH_OFFSET 0 +#define IC_IDMAC_3_PRPVF_WIDTH_MASK (0x3ff << 10) +#define IC_IDMAC_3_PRPVF_WIDTH_OFFSET 10 +#define IC_IDMAC_3_PP_WIDTH_MASK (0x3ff << 20) +#define IC_IDMAC_3_PP_WIDTH_OFFSET 20 + +struct ic_task_regoffs { + u32 rsc; + u32 tpmem_csc[2]; +}; + +struct ic_task_bitfields { + u32 ic_conf_en; + u32 ic_conf_rot_en; + u32 ic_conf_cmb_en; + u32 ic_conf_csc1_en; + u32 ic_conf_csc2_en; + u32 ic_cmb_galpha_bit; +}; + +static const struct ic_task_regoffs ic_task_reg[IC_NUM_TASKS] = { + [IC_TASK_ENCODER] = { + .rsc = IC_PRP_ENC_RSC, + .tpmem_csc = {0x2008, 0}, + }, + [IC_TASK_VIEWFINDER] = { + .rsc = IC_PRP_VF_RSC, + .tpmem_csc = {0x4028, 0x4040}, + }, + [IC_TASK_POST_PROCESSOR] = { + .rsc = IC_PP_RSC, + .tpmem_csc = {0x6060, 0x6078}, + }, +}; + +static const struct ic_task_bitfields ic_task_bit[IC_NUM_TASKS] = { + [IC_TASK_ENCODER] = { + .ic_conf_en = IC_CONF_PRPENC_EN, + .ic_conf_rot_en = IC_CONF_PRPENC_ROT_EN, + .ic_conf_cmb_en = 0, /* NA */ + .ic_conf_csc1_en = IC_CONF_PRPENC_CSC1, + .ic_conf_csc2_en = 0, /* NA */ + .ic_cmb_galpha_bit = 0, /* NA */ + }, + [IC_TASK_VIEWFINDER] = { + .ic_conf_en = IC_CONF_PRPVF_EN, + .ic_conf_rot_en = IC_CONF_PRPVF_ROT_EN, + .ic_conf_cmb_en = IC_CONF_PRPVF_CMB, + .ic_conf_csc1_en = IC_CONF_PRPVF_CSC1, + .ic_conf_csc2_en = IC_CONF_PRPVF_CSC2, + .ic_cmb_galpha_bit = 0, + }, + [IC_TASK_POST_PROCESSOR] = { + .ic_conf_en = IC_CONF_PP_EN, + .ic_conf_rot_en = IC_CONF_PP_ROT_EN, + .ic_conf_cmb_en = IC_CONF_PP_CMB, + .ic_conf_csc1_en = IC_CONF_PP_CSC1, + .ic_conf_csc2_en = IC_CONF_PP_CSC2, + .ic_cmb_galpha_bit = 8, + }, +}; + +struct ipu_ic_priv; + +struct ipu_ic { + enum ipu_ic_task task; + const struct ic_task_regoffs *reg; + const struct ic_task_bitfields *bit; + + enum ipu_color_space in_cs, g_in_cs; + enum ipu_color_space out_cs; + bool graphics; + bool rotation; + bool in_use; + + struct ipu_ic_priv *priv; +}; + +struct ipu_ic_priv { + void __iomem *base; + void __iomem *tpmem_base; + spinlock_t lock; + struct ipu_soc *ipu; + int use_count; + struct ipu_ic task[IC_NUM_TASKS]; +}; + +static inline u32 ipu_ic_read(struct ipu_ic *ic, unsigned offset) +{ + return readl(ic->priv->base + offset); +} + +static inline void ipu_ic_write(struct ipu_ic *ic, u32 value, unsigned offset) +{ + writel(value, ic->priv->base + offset); +} + +struct ic_csc_params { + s16 coeff[3][3]; /* signed 9-bit integer coefficients */ + s16 offset[3]; /* signed 11+2-bit fixed point offset */ + u8 scale:2; /* scale coefficients * 2^(scale-1) */ + bool sat:1; /* saturate to (16, 235(Y) / 240(U, V)) */ +}; + +/* + * Y = R * .299 + G * .587 + B * .114; + * U = R * -.169 + G * -.332 + B * .500 + 128.; + * V = R * .500 + G * -.419 + B * -.0813 + 128.; + */ +static const struct ic_csc_params ic_csc_rgb2ycbcr = { + .coeff = { + { 77, 150, 29 }, + { 469, 427, 128 }, + { 128, 405, 491 }, + }, + .offset = { 0, 512, 512 }, + .scale = 1, +}; + +/* transparent RGB->RGB matrix for graphics combining */ +static const struct ic_csc_params ic_csc_rgb2rgb = { + .coeff = { + { 128, 0, 0 }, + { 0, 128, 0 }, + { 0, 0, 128 }, + }, + .scale = 2, +}; + +/* + * R = (1.164 * (Y - 16)) + (1.596 * (Cr - 128)); + * G = (1.164 * (Y - 16)) - (0.392 * (Cb - 128)) - (0.813 * (Cr - 128)); + * B = (1.164 * (Y - 16)) + (2.017 * (Cb - 128); + */ +static const struct ic_csc_params ic_csc_ycbcr2rgb = { + .coeff = { + { 149, 0, 204 }, + { 149, 462, 408 }, + { 149, 255, 0 }, + }, + .offset = { -446, 266, -554 }, + .scale = 2, +}; + +static int init_csc(struct ipu_ic *ic, + enum ipu_color_space inf, + enum ipu_color_space outf, + int csc_index) +{ + struct ipu_ic_priv *priv = ic->priv; + const struct ic_csc_params *params; + u32 __iomem *base; + const u16 (*c)[3]; + const u16 *a; + u32 param; + + base = (u32 __iomem *) + (priv->tpmem_base + ic->reg->tpmem_csc[csc_index]); + + if (inf == IPUV3_COLORSPACE_YUV && outf == IPUV3_COLORSPACE_RGB) + params = &ic_csc_ycbcr2rgb; + else if (inf == IPUV3_COLORSPACE_RGB && outf == IPUV3_COLORSPACE_YUV) + params = &ic_csc_rgb2ycbcr; + else if (inf == IPUV3_COLORSPACE_RGB && outf == IPUV3_COLORSPACE_RGB) + params = &ic_csc_rgb2rgb; + else { + dev_err(priv->ipu->dev, "Unsupported color space conversion\n"); + return -EINVAL; + } + + /* Cast to unsigned */ + c = (const u16 (*)[3])params->coeff; + a = (const u16 *)params->offset; + + param = ((a[0] & 0x1f) << 27) | ((c[0][0] & 0x1ff) << 18) | + ((c[1][1] & 0x1ff) << 9) | (c[2][2] & 0x1ff); + writel(param, base++); + + param = ((a[0] & 0x1fe0) >> 5) | (params->scale << 8) | + (params->sat << 9); + writel(param, base++); + + param = ((a[1] & 0x1f) << 27) | ((c[0][1] & 0x1ff) << 18) | + ((c[1][0] & 0x1ff) << 9) | (c[2][0] & 0x1ff); + writel(param, base++); + + param = ((a[1] & 0x1fe0) >> 5); + writel(param, base++); + + param = ((a[2] & 0x1f) << 27) | ((c[0][2] & 0x1ff) << 18) | + ((c[1][2] & 0x1ff) << 9) | (c[2][1] & 0x1ff); + writel(param, base++); + + param = ((a[2] & 0x1fe0) >> 5); + writel(param, base++); + + return 0; +} + +static int calc_resize_coeffs(struct ipu_ic *ic, + u32 in_size, u32 out_size, + u32 *resize_coeff, + u32 *downsize_coeff) +{ + struct ipu_ic_priv *priv = ic->priv; + struct ipu_soc *ipu = priv->ipu; + u32 temp_size, temp_downsize; + + /* + * Input size cannot be more than 4096, and output size cannot + * be more than 1024 + */ + if (in_size > 4096) { + dev_err(ipu->dev, "Unsupported resize (in_size > 4096)\n"); + return -EINVAL; + } + if (out_size > 1024) { + dev_err(ipu->dev, "Unsupported resize (out_size > 1024)\n"); + return -EINVAL; + } + + /* Cannot downsize more than 8:1 */ + if ((out_size << 3) < in_size) { + dev_err(ipu->dev, "Unsupported downsize\n"); + return -EINVAL; + } + + /* Compute downsizing coefficient */ + temp_downsize = 0; + temp_size = in_size; + while (((temp_size > 1024) || (temp_size >= out_size * 2)) && + (temp_downsize < 2)) { + temp_size >>= 1; + temp_downsize++; + } + *downsize_coeff = temp_downsize; + + /* + * compute resizing coefficient using the following equation: + * resize_coeff = M * (SI - 1) / (SO - 1) + * where M = 2^13, SI = input size, SO = output size + */ + *resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1); + if (*resize_coeff >= 16384L) { + dev_err(ipu->dev, "Warning! Overflow on resize coeff.\n"); + *resize_coeff = 0x3FFF; + } + + return 0; +} + +void ipu_ic_task_enable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 ic_conf; + + spin_lock_irqsave(&priv->lock, flags); + + ic_conf = ipu_ic_read(ic, IC_CONF); + + ic_conf |= ic->bit->ic_conf_en; + + if (ic->rotation) + ic_conf |= ic->bit->ic_conf_rot_en; + + if (ic->in_cs != ic->out_cs) + ic_conf |= ic->bit->ic_conf_csc1_en; + + if (ic->graphics) { + ic_conf |= ic->bit->ic_conf_cmb_en; + ic_conf |= ic->bit->ic_conf_csc1_en; + + if (ic->g_in_cs != ic->out_cs) + ic_conf |= ic->bit->ic_conf_csc2_en; + } + + ipu_ic_write(ic, ic_conf, IC_CONF); + + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_ic_task_enable); + +void ipu_ic_task_disable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 ic_conf; + + spin_lock_irqsave(&priv->lock, flags); + + ic_conf = ipu_ic_read(ic, IC_CONF); + + ic_conf &= ~(ic->bit->ic_conf_en | + ic->bit->ic_conf_csc1_en | + ic->bit->ic_conf_rot_en); + if (ic->bit->ic_conf_csc2_en) + ic_conf &= ~ic->bit->ic_conf_csc2_en; + if (ic->bit->ic_conf_cmb_en) + ic_conf &= ~ic->bit->ic_conf_cmb_en; + + ipu_ic_write(ic, ic_conf, IC_CONF); + + ic->rotation = ic->graphics = false; + + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_ic_task_disable); + +int ipu_ic_task_graphics_init(struct ipu_ic *ic, + enum ipu_color_space in_g_cs, + bool galpha_en, u32 galpha, + bool colorkey_en, u32 colorkey) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 reg, ic_conf; + int ret = 0; + + if (ic->task == IC_TASK_ENCODER) + return -EINVAL; + + spin_lock_irqsave(&priv->lock, flags); + + ic_conf = ipu_ic_read(ic, IC_CONF); + + if (!(ic_conf & ic->bit->ic_conf_csc1_en)) { + /* need transparent CSC1 conversion */ + ret = init_csc(ic, IPUV3_COLORSPACE_RGB, + IPUV3_COLORSPACE_RGB, 0); + if (ret) + goto unlock; + } + + ic->g_in_cs = in_g_cs; + + if (ic->g_in_cs != ic->out_cs) { + ret = init_csc(ic, ic->g_in_cs, ic->out_cs, 1); + if (ret) + goto unlock; + } + + if (galpha_en) { + ic_conf |= IC_CONF_IC_GLB_LOC_A; + reg = ipu_ic_read(ic, IC_CMBP_1); + reg &= ~(0xff << ic->bit->ic_cmb_galpha_bit); + reg |= (galpha << ic->bit->ic_cmb_galpha_bit); + ipu_ic_write(ic, reg, IC_CMBP_1); + } else + ic_conf &= ~IC_CONF_IC_GLB_LOC_A; + + if (colorkey_en) { + ic_conf |= IC_CONF_KEY_COLOR_EN; + ipu_ic_write(ic, colorkey, IC_CMBP_2); + } else + ic_conf &= ~IC_CONF_KEY_COLOR_EN; + + ipu_ic_write(ic, ic_conf, IC_CONF); + + ic->graphics = true; +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init); + +int ipu_ic_task_init(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs) +{ + struct ipu_ic_priv *priv = ic->priv; + u32 reg, downsize_coeff, resize_coeff; + unsigned long flags; + int ret = 0; + + /* Setup vertical resizing */ + ret = calc_resize_coeffs(ic, in_height, out_height, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; + + reg = (downsize_coeff << 30) | (resize_coeff << 16); + + /* Setup horizontal resizing */ + ret = calc_resize_coeffs(ic, in_width, out_width, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; + + reg |= (downsize_coeff << 14) | resize_coeff; + + spin_lock_irqsave(&priv->lock, flags); + + ipu_ic_write(ic, reg, ic->reg->rsc); + + /* Setup color space conversion */ + ic->in_cs = in_cs; + ic->out_cs = out_cs; + + if (ic->in_cs != ic->out_cs) { + ret = init_csc(ic, ic->in_cs, ic->out_cs, 0); + if (ret) + goto unlock; + } + +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_task_init); + +int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, + u32 width, u32 height, int burst_size, + enum ipu_rotate_mode rot) +{ + struct ipu_ic_priv *priv = ic->priv; + struct ipu_soc *ipu = priv->ipu; + u32 ic_idmac_1, ic_idmac_2, ic_idmac_3; + u32 temp_rot = bitrev8(rot) >> 5; + bool need_hor_flip = false; + unsigned long flags; + int ret = 0; + + if ((burst_size != 8) && (burst_size != 16)) { + dev_err(ipu->dev, "Illegal burst length for IC\n"); + return -EINVAL; + } + + width--; + height--; + + if (temp_rot & 0x2) /* Need horizontal flip */ + need_hor_flip = true; + + spin_lock_irqsave(&priv->lock, flags); + + ic_idmac_1 = ipu_ic_read(ic, IC_IDMAC_1); + ic_idmac_2 = ipu_ic_read(ic, IC_IDMAC_2); + ic_idmac_3 = ipu_ic_read(ic, IC_IDMAC_3); + + switch (channel->num) { + case IPUV3_CHANNEL_IC_PP_MEM: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB2_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB2_BURST_16; + + if (need_hor_flip) + ic_idmac_1 |= IC_IDMAC_1_PP_FLIP_RS; + else + ic_idmac_1 &= ~IC_IDMAC_1_PP_FLIP_RS; + + ic_idmac_2 &= ~IC_IDMAC_2_PP_HEIGHT_MASK; + ic_idmac_2 |= height << IC_IDMAC_2_PP_HEIGHT_OFFSET; + + ic_idmac_3 &= ~IC_IDMAC_3_PP_WIDTH_MASK; + ic_idmac_3 |= width << IC_IDMAC_3_PP_WIDTH_OFFSET; + break; + case IPUV3_CHANNEL_MEM_IC_PP: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB5_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB5_BURST_16; + break; + case IPUV3_CHANNEL_MEM_ROT_PP: + ic_idmac_1 &= ~IC_IDMAC_1_PP_ROT_MASK; + ic_idmac_1 |= temp_rot << IC_IDMAC_1_PP_ROT_OFFSET; + break; + case IPUV3_CHANNEL_MEM_IC_PRP_VF: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB6_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB6_BURST_16; + break; + case IPUV3_CHANNEL_IC_PRP_ENC_MEM: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB0_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB0_BURST_16; + + if (need_hor_flip) + ic_idmac_1 |= IC_IDMAC_1_PRPENC_FLIP_RS; + else + ic_idmac_1 &= ~IC_IDMAC_1_PRPENC_FLIP_RS; + + ic_idmac_2 &= ~IC_IDMAC_2_PRPENC_HEIGHT_MASK; + ic_idmac_2 |= height << IC_IDMAC_2_PRPENC_HEIGHT_OFFSET; + + ic_idmac_3 &= ~IC_IDMAC_3_PRPENC_WIDTH_MASK; + ic_idmac_3 |= width << IC_IDMAC_3_PRPENC_WIDTH_OFFSET; + break; + case IPUV3_CHANNEL_MEM_ROT_ENC: + ic_idmac_1 &= ~IC_IDMAC_1_PRPENC_ROT_MASK; + ic_idmac_1 |= temp_rot << IC_IDMAC_1_PRPENC_ROT_OFFSET; + break; + case IPUV3_CHANNEL_IC_PRP_VF_MEM: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB1_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB1_BURST_16; + + if (need_hor_flip) + ic_idmac_1 |= IC_IDMAC_1_PRPVF_FLIP_RS; + else + ic_idmac_1 &= ~IC_IDMAC_1_PRPVF_FLIP_RS; + + ic_idmac_2 &= ~IC_IDMAC_2_PRPVF_HEIGHT_MASK; + ic_idmac_2 |= height << IC_IDMAC_2_PRPVF_HEIGHT_OFFSET; + + ic_idmac_3 &= ~IC_IDMAC_3_PRPVF_WIDTH_MASK; + ic_idmac_3 |= width << IC_IDMAC_3_PRPVF_WIDTH_OFFSET; + break; + case IPUV3_CHANNEL_MEM_ROT_VF: + ic_idmac_1 &= ~IC_IDMAC_1_PRPVF_ROT_MASK; + ic_idmac_1 |= temp_rot << IC_IDMAC_1_PRPVF_ROT_OFFSET; + break; + case IPUV3_CHANNEL_G_MEM_IC_PRP_VF: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB3_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB3_BURST_16; + break; + case IPUV3_CHANNEL_G_MEM_IC_PP: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB4_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB4_BURST_16; + break; + case IPUV3_CHANNEL_VDI_MEM_IC_VF: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB7_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB7_BURST_16; + break; + default: + goto unlock; + } + + ipu_ic_write(ic, ic_idmac_1, IC_IDMAC_1); + ipu_ic_write(ic, ic_idmac_2, IC_IDMAC_2); + ipu_ic_write(ic, ic_idmac_3, IC_IDMAC_3); + + if (rot >= IPU_ROTATE_90_RIGHT) + ic->rotation = true; + +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_task_idma_init); + +int ipu_ic_enable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 module = IPU_CONF_IC_EN; + + spin_lock_irqsave(&priv->lock, flags); + + if (ic->rotation) + module |= IPU_CONF_ROT_EN; + + if (!priv->use_count) + ipu_module_enable(priv->ipu, module); + + priv->use_count++; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_ic_enable); + +int ipu_ic_disable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 module = IPU_CONF_IC_EN | IPU_CONF_ROT_EN; + + spin_lock_irqsave(&priv->lock, flags); + + priv->use_count--; + + if (!priv->use_count) + ipu_module_disable(priv->ipu, module); + + if (priv->use_count < 0) + priv->use_count = 0; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_ic_disable); + +struct ipu_ic *ipu_ic_get(struct ipu_soc *ipu, enum ipu_ic_task task) +{ + struct ipu_ic_priv *priv = ipu->ic_priv; + unsigned long flags; + struct ipu_ic *ic, *ret; + + if (task >= IC_NUM_TASKS) + return ERR_PTR(-EINVAL); + + ic = &priv->task[task]; + + spin_lock_irqsave(&priv->lock, flags); + + if (ic->in_use) { + ret = ERR_PTR(-EBUSY); + goto unlock; + } + + ic->in_use = true; + ret = ic; + +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_get); + +void ipu_ic_put(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + ic->in_use = false; + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_ic_put); + +int ipu_ic_init(struct ipu_soc *ipu, struct device *dev, + unsigned long base, unsigned long tpmem_base) +{ + struct ipu_ic_priv *priv; + int i; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ipu->ic_priv = priv; + + spin_lock_init(&priv->lock); + priv->base = devm_ioremap(dev, base, PAGE_SIZE); + if (!priv->base) + return -ENOMEM; + priv->tpmem_base = devm_ioremap(dev, tpmem_base, SZ_64K); + if (!priv->tpmem_base) + return -ENOMEM; + + dev_dbg(dev, "IC base: 0x%08lx remapped to %p\n", base, priv->base); + + priv->ipu = ipu; + + for (i = 0; i < IC_NUM_TASKS; i++) { + priv->task[i].task = i; + priv->task[i].priv = priv; + priv->task[i].reg = &ic_task_reg[i]; + priv->task[i].bit = &ic_task_bit[i]; + } + + return 0; +} + +void ipu_ic_exit(struct ipu_soc *ipu) +{ +} + +void ipu_ic_dump(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + struct ipu_soc *ipu = priv->ipu; + + dev_dbg(ipu->dev, "IC_CONF = \t0x%08X\n", + ipu_ic_read(ic, IC_CONF)); + dev_dbg(ipu->dev, "IC_PRP_ENC_RSC = \t0x%08X\n", + ipu_ic_read(ic, IC_PRP_ENC_RSC)); + dev_dbg(ipu->dev, "IC_PRP_VF_RSC = \t0x%08X\n", + ipu_ic_read(ic, IC_PRP_VF_RSC)); + dev_dbg(ipu->dev, "IC_PP_RSC = \t0x%08X\n", + ipu_ic_read(ic, IC_PP_RSC)); + dev_dbg(ipu->dev, "IC_CMBP_1 = \t0x%08X\n", + ipu_ic_read(ic, IC_CMBP_1)); + dev_dbg(ipu->dev, "IC_CMBP_2 = \t0x%08X\n", + ipu_ic_read(ic, IC_CMBP_2)); + dev_dbg(ipu->dev, "IC_IDMAC_1 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_1)); + dev_dbg(ipu->dev, "IC_IDMAC_2 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_2)); + dev_dbg(ipu->dev, "IC_IDMAC_3 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_3)); + dev_dbg(ipu->dev, "IC_IDMAC_4 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_4)); +} +EXPORT_SYMBOL_GPL(ipu_ic_dump); diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 9b274f1259e1..1596a4f52faf 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -161,6 +161,7 @@ struct ipu_csi; struct ipu_dc_priv; struct ipu_dmfc_priv; struct ipu_di; +struct ipu_ic_priv; struct ipu_smfc_priv; struct ipu_devtype; @@ -191,6 +192,7 @@ struct ipu_soc { struct ipu_dmfc_priv *dmfc_priv; struct ipu_di *di_priv[2]; struct ipu_csi *csi_priv[2]; + struct ipu_ic_priv *ic_priv; struct ipu_smfc_priv *smfc_priv; }; @@ -217,6 +219,10 @@ int ipu_csi_init(struct ipu_soc *ipu, struct device *dev, int id, unsigned long base, u32 module, struct clk *clk_ipu); void ipu_csi_exit(struct ipu_soc *ipu, int id); +int ipu_ic_init(struct ipu_soc *ipu, struct device *dev, + unsigned long base, unsigned long tpmem_base); +void ipu_ic_exit(struct ipu_soc *ipu); + int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id, unsigned long base, u32 module, struct clk *ipu_clk); void ipu_di_exit(struct ipu_soc *ipu, int id); diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 6d254275192b..a477814a03af 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -71,6 +71,20 @@ enum ipu_csi_dest { IPU_CSI_DEST_VDIC, /* to VDIC */ }; +/* + * Enumeration of IPU rotation modes + */ +enum ipu_rotate_mode { + IPU_ROTATE_NONE = 0, + IPU_ROTATE_VERT_FLIP, + IPU_ROTATE_HORIZ_FLIP, + IPU_ROTATE_180, + IPU_ROTATE_90_RIGHT, + IPU_ROTATE_90_RIGHT_VFLIP, + IPU_ROTATE_90_RIGHT_HFLIP, + IPU_ROTATE_90_LEFT, +}; + enum ipu_color_space { IPUV3_COLORSPACE_RGB, IPUV3_COLORSPACE_YUV, @@ -242,6 +256,37 @@ struct ipu_csi *ipu_csi_get(struct ipu_soc *ipu, int id); void ipu_csi_put(struct ipu_csi *csi); void ipu_csi_dump(struct ipu_csi *csi); +/* + * IPU Image Converter (ic) functions + */ +enum ipu_ic_task { + IC_TASK_ENCODER, + IC_TASK_VIEWFINDER, + IC_TASK_POST_PROCESSOR, + IC_NUM_TASKS, +}; + +struct ipu_ic; +int ipu_ic_task_init(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs); +int ipu_ic_task_graphics_init(struct ipu_ic *ic, + enum ipu_color_space in_g_cs, + bool galpha_en, u32 galpha, + bool colorkey_en, u32 colorkey); +void ipu_ic_task_enable(struct ipu_ic *ic); +void ipu_ic_task_disable(struct ipu_ic *ic); +int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, + u32 width, u32 height, int burst_size, + enum ipu_rotate_mode rot); +int ipu_ic_enable(struct ipu_ic *ic); +int ipu_ic_disable(struct ipu_ic *ic); +struct ipu_ic *ipu_ic_get(struct ipu_soc *ipu, enum ipu_ic_task task); +void ipu_ic_put(struct ipu_ic *ic); +void ipu_ic_dump(struct ipu_ic *ic); + /* * IPU Sensor Multiple FIFO Controller (SMFC) functions */ -- GitLab From fc4353559e587f5962f22c24ca7e015bdbea1e49 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:33 -0700 Subject: [PATCH 0324/1868] gpu: ipu-v3: smfc: Move enable/disable to ipu-smfc.c Move the SMFC module enable/disable helpers into the ipu-smfc submodule. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 12 ------------ drivers/gpu/ipu-v3/ipu-smfc.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 312eef6ffcad..f5a4e1ac2b50 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -217,18 +217,6 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask) } EXPORT_SYMBOL_GPL(ipu_module_disable); -int ipu_smfc_enable(struct ipu_soc *ipu) -{ - return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); -} -EXPORT_SYMBOL_GPL(ipu_smfc_enable); - -int ipu_smfc_disable(struct ipu_soc *ipu) -{ - return ipu_module_disable(ipu, IPU_CONF_SMFC_EN); -} -EXPORT_SYMBOL_GPL(ipu_smfc_disable); - int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel) { struct ipu_soc *ipu = channel->ipu; diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index e4f85ad286fc..87ac624dd7ca 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -71,6 +71,18 @@ int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_ } EXPORT_SYMBOL_GPL(ipu_smfc_map_channel); +int ipu_smfc_enable(struct ipu_soc *ipu) +{ + return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); +} +EXPORT_SYMBOL_GPL(ipu_smfc_enable); + +int ipu_smfc_disable(struct ipu_soc *ipu) +{ + return ipu_module_disable(ipu, IPU_CONF_SMFC_EN); +} +EXPORT_SYMBOL_GPL(ipu_smfc_disable); + int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) { -- GitLab From 7fafa8f06f9bdf32b806b4612bfe387de8e34125 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:34 -0700 Subject: [PATCH 0325/1868] gpu: ipu-v3: smfc: Convert to per-channel Convert the smfc object to be specific to a single smfc channel. Add ipu_smfc_{get|put} to retrieve and release a single smfc channel for exclusive use, and add use counter to ipu_smfc_{enable|disable}. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-smfc.c | 132 +++++++++++++++++++++++++++------- include/video/imx-ipu-v3.h | 10 +-- 2 files changed, 112 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index 87ac624dd7ca..a6429ca913c1 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -21,9 +21,18 @@ #include "ipu-prv.h" +struct ipu_smfc { + struct ipu_smfc_priv *priv; + int chno; + bool inuse; +}; + struct ipu_smfc_priv { void __iomem *base; spinlock_t lock; + struct ipu_soc *ipu; + struct ipu_smfc channel[4]; + int use_count; }; /*SMFC Registers */ @@ -31,75 +40,146 @@ struct ipu_smfc_priv { #define SMFC_WMC 0x0004 #define SMFC_BS 0x0008 -int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize) +int ipu_smfc_set_burstsize(struct ipu_smfc *smfc, int burstsize) { - struct ipu_smfc_priv *smfc = ipu->smfc_priv; + struct ipu_smfc_priv *priv = smfc->priv; unsigned long flags; u32 val, shift; - spin_lock_irqsave(&smfc->lock, flags); + spin_lock_irqsave(&priv->lock, flags); - shift = channel * 4; - val = readl(smfc->base + SMFC_BS); + shift = smfc->chno * 4; + val = readl(priv->base + SMFC_BS); val &= ~(0xf << shift); val |= burstsize << shift; - writel(val, smfc->base + SMFC_BS); + writel(val, priv->base + SMFC_BS); - spin_unlock_irqrestore(&smfc->lock, flags); + spin_unlock_irqrestore(&priv->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_set_burstsize); -int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id) +int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id) { - struct ipu_smfc_priv *smfc = ipu->smfc_priv; + struct ipu_smfc_priv *priv = smfc->priv; unsigned long flags; u32 val, shift; - spin_lock_irqsave(&smfc->lock, flags); + spin_lock_irqsave(&priv->lock, flags); - shift = channel * 3; - val = readl(smfc->base + SMFC_MAP); + shift = smfc->chno * 3; + val = readl(priv->base + SMFC_MAP); val &= ~(0x7 << shift); val |= ((csi_id << 2) | mipi_id) << shift; - writel(val, smfc->base + SMFC_MAP); + writel(val, priv->base + SMFC_MAP); - spin_unlock_irqrestore(&smfc->lock, flags); + spin_unlock_irqrestore(&priv->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_map_channel); -int ipu_smfc_enable(struct ipu_soc *ipu) +int ipu_smfc_enable(struct ipu_smfc *smfc) { - return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + if (!priv->use_count) + ipu_module_enable(priv->ipu, IPU_CONF_SMFC_EN); + + priv->use_count++; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_enable); -int ipu_smfc_disable(struct ipu_soc *ipu) +int ipu_smfc_disable(struct ipu_smfc *smfc) { - return ipu_module_disable(ipu, IPU_CONF_SMFC_EN); + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + priv->use_count--; + + if (!priv->use_count) + ipu_module_disable(priv->ipu, IPU_CONF_SMFC_EN); + + if (priv->use_count < 0) + priv->use_count = 0; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_disable); +struct ipu_smfc *ipu_smfc_get(struct ipu_soc *ipu, unsigned int chno) +{ + struct ipu_smfc_priv *priv = ipu->smfc_priv; + struct ipu_smfc *smfc, *ret; + unsigned long flags; + + if (chno >= 4) + return ERR_PTR(-EINVAL); + + smfc = &priv->channel[chno]; + ret = smfc; + + spin_lock_irqsave(&priv->lock, flags); + + if (smfc->inuse) { + ret = ERR_PTR(-EBUSY); + goto unlock; + } + + smfc->inuse = true; +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_smfc_get); + +void ipu_smfc_put(struct ipu_smfc *smfc) +{ + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + smfc->inuse = false; + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_smfc_put); + int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) { - struct ipu_smfc_priv *smfc; + struct ipu_smfc_priv *priv; + int i; - smfc = devm_kzalloc(dev, sizeof(*smfc), GFP_KERNEL); - if (!smfc) + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) return -ENOMEM; - ipu->smfc_priv = smfc; - spin_lock_init(&smfc->lock); + ipu->smfc_priv = priv; + spin_lock_init(&priv->lock); + priv->ipu = ipu; - smfc->base = devm_ioremap(dev, base, PAGE_SIZE); - if (!smfc->base) + priv->base = devm_ioremap(dev, base, PAGE_SIZE); + if (!priv->base) return -ENOMEM; - pr_debug("%s: ioremap 0x%08lx -> %p\n", __func__, base, smfc->base); + for (i = 0; i < 4; i++) { + priv->channel[i].priv = priv; + priv->channel[i].chno = i; + } + + pr_debug("%s: ioremap 0x%08lx -> %p\n", __func__, base, priv->base); return 0; } diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index a477814a03af..a695ee83e4e1 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -290,10 +290,12 @@ void ipu_ic_dump(struct ipu_ic *ic); /* * IPU Sensor Multiple FIFO Controller (SMFC) functions */ -int ipu_smfc_enable(struct ipu_soc *ipu); -int ipu_smfc_disable(struct ipu_soc *ipu); -int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id); -int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize); +struct ipu_smfc *ipu_smfc_get(struct ipu_soc *ipu, unsigned int chno); +void ipu_smfc_put(struct ipu_smfc *smfc); +int ipu_smfc_enable(struct ipu_smfc *smfc); +int ipu_smfc_disable(struct ipu_smfc *smfc); +int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id); +int ipu_smfc_set_burstsize(struct ipu_smfc *smfc, int burstsize); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); -- GitLab From a2be35e3320b27c84488729e9fb56a62e74d65fa Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:35 -0700 Subject: [PATCH 0326/1868] gpu: ipu-v3: smfc: Add ipu_smfc_set_watermark() Adds ipu_smfc_set_watermark() which programs a channel's SMFC FIFO levels at which the watermark signal is set and cleared. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-smfc.c | 20 ++++++++++++++++++++ include/video/imx-ipu-v3.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index a6429ca913c1..6ca9b43ce25a 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -80,6 +80,26 @@ int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id) } EXPORT_SYMBOL_GPL(ipu_smfc_map_channel); +int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level) +{ + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + u32 val, shift; + + spin_lock_irqsave(&priv->lock, flags); + + shift = smfc->chno * 6 + (smfc->chno > 1 ? 4 : 0); + val = readl(priv->base + SMFC_WMC); + val &= ~(0x3f << shift); + val |= ((clr_level << 3) | set_level) << shift; + writel(val, priv->base + SMFC_WMC); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_smfc_set_watermark); + int ipu_smfc_enable(struct ipu_smfc *smfc) { struct ipu_smfc_priv *priv = smfc->priv; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index a695ee83e4e1..49e5954ac033 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -296,6 +296,7 @@ int ipu_smfc_enable(struct ipu_smfc *smfc); int ipu_smfc_disable(struct ipu_smfc *smfc); int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id); int ipu_smfc_set_burstsize(struct ipu_smfc *smfc, int burstsize); +int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); -- GitLab From ae0e9708b30b3eebe5a58e4d055eb49a73d641dd Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:36 -0700 Subject: [PATCH 0327/1868] gpu: ipu-v3: Add ipu_mbus_code_to_colorspace() Add ipu_mbus_code_to_colorspace() to find ipu_color_space from a media bus pixel format code. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 13 +++++++++++++ include/video/imx-ipu-v3.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index f5a4e1ac2b50..49ee990b4d1f 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -101,6 +101,19 @@ enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat) } EXPORT_SYMBOL_GPL(ipu_pixelformat_to_colorspace); +enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code) +{ + switch (mbus_code & 0xf000) { + case 0x1000: + return IPUV3_COLORSPACE_RGB; + case 0x2000: + return IPUV3_COLORSPACE_YUV; + default: + return IPUV3_COLORSPACE_UNKNOWN; + } +} +EXPORT_SYMBOL_GPL(ipu_mbus_code_to_colorspace); + struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned num) { struct ipuv3_channel *channel; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 49e5954ac033..7c97ccaf39f6 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -300,6 +300,7 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); +enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); struct ipu_client_platformdata { int csi; -- GitLab From f835f386a119c3f78f5acb93e86a4f025211739a Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:37 -0700 Subject: [PATCH 0328/1868] gpu: ipu-v3: Add rotation mode conversion utilities Add two functions: - ipu_degrees_to_rot_mode(): converts a degrees, hflip, and vflip setting to an IPU rotation mode. - ipu_rot_mode_to_degrees(): converts an IPU rotation mode with given hflip and vflip settings to degrees. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 64 +++++++++++++++++++++++++++++++++ include/video/imx-ipu-v3.h | 4 +++ 2 files changed, 68 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 49ee990b4d1f..a1d42eed5d06 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -114,6 +114,70 @@ enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code) } EXPORT_SYMBOL_GPL(ipu_mbus_code_to_colorspace); +int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, + bool hflip, bool vflip) +{ + u32 r90, vf, hf; + + switch (degrees) { + case 0: + vf = hf = r90 = 0; + break; + case 90: + vf = hf = 0; + r90 = 1; + break; + case 180: + vf = hf = 1; + r90 = 0; + break; + case 270: + vf = hf = r90 = 1; + break; + default: + return -EINVAL; + } + + hf ^= (u32)hflip; + vf ^= (u32)vflip; + + *mode = (enum ipu_rotate_mode)((r90 << 2) | (hf << 1) | vf); + return 0; +} +EXPORT_SYMBOL_GPL(ipu_degrees_to_rot_mode); + +int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, + bool hflip, bool vflip) +{ + u32 r90, vf, hf; + + r90 = ((u32)mode >> 2) & 0x1; + hf = ((u32)mode >> 1) & 0x1; + vf = ((u32)mode >> 0) & 0x1; + hf ^= (u32)hflip; + vf ^= (u32)vflip; + + switch ((enum ipu_rotate_mode)((r90 << 2) | (hf << 1) | vf)) { + case IPU_ROTATE_NONE: + *degrees = 0; + break; + case IPU_ROTATE_90_RIGHT: + *degrees = 90; + break; + case IPU_ROTATE_180: + *degrees = 180; + break; + case IPU_ROTATE_90_LEFT: + *degrees = 270; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_rot_mode_to_degrees); + struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned num) { struct ipuv3_channel *channel; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 7c97ccaf39f6..3562698528bd 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -301,6 +301,10 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); +int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, + bool hflip, bool vflip); +int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, + bool hflip, bool vflip); struct ipu_client_platformdata { int csi; -- GitLab From 4cea940d34319fb5d5e2f4d554e23f766c228e90 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:38 -0700 Subject: [PATCH 0329/1868] gpu: ipu-v3: Add helper function checking if pixfmt is planar Add simple helper function returning true if passed pixel format is one of supported planar ones. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 12 ++++++++++++ include/video/imx-ipu-v3.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index a1d42eed5d06..18563c240f10 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -101,6 +101,18 @@ enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat) } EXPORT_SYMBOL_GPL(ipu_pixelformat_to_colorspace); +bool ipu_pixelformat_is_planar(u32 pixelformat) +{ + switch (pixelformat) { + case V4L2_PIX_FMT_YUV420: + case V4L2_PIX_FMT_YVU420: + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(ipu_pixelformat_is_planar); + enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code) { switch (mbus_code & 0xf000) { diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 3562698528bd..ecb01f843aee 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -301,6 +301,7 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); +bool ipu_pixelformat_is_planar(u32 pixelformat); int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, bool hflip, bool vflip); int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, -- GitLab From a4cd8f229ff71db0c95c0d96381d4fb9239fdb19 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:39 -0700 Subject: [PATCH 0330/1868] gpu: ipu-v3: Move IDMAC channel names to imx-ipu-v3.h Move the IDMAC channel names to imx-ipu-v3.h, to make the names available outside IPU. Add a couple new channels in the process (async display BG/FG, channels 24 and 29). Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prv.h | 25 ------------------------- include/video/imx-ipu-v3.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 1596a4f52faf..7f08a461c929 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -24,31 +24,6 @@ struct ipu_soc; #include